farahidi 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. farahidi-0.3.0/.gitignore +24 -0
  2. farahidi-0.3.0/LICENSE +674 -0
  3. farahidi-0.3.0/NOTICE +28 -0
  4. farahidi-0.3.0/PKG-INFO +191 -0
  5. farahidi-0.3.0/README.md +162 -0
  6. farahidi-0.3.0/pyproject.toml +74 -0
  7. farahidi-0.3.0/src/farahidi/__init__.py +57 -0
  8. farahidi-0.3.0/src/farahidi/analyzer.py +728 -0
  9. farahidi-0.3.0/src/farahidi/cli.py +184 -0
  10. farahidi-0.3.0/src/farahidi/clitics.py +150 -0
  11. farahidi-0.3.0/src/farahidi/data/DATA.Clitics.Enclitics.map.jsonl.gz +0 -0
  12. farahidi-0.3.0/src/farahidi/data/DATA.Clitics.Proclitics.map.jsonl.gz +0 -0
  13. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.CaseOrMood.list.jsonl.gz +0 -0
  14. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Formulas.map.jsonl.gz +0 -0
  15. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Definit.list.jsonl.gz +0 -0
  16. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Gender.list.jsonl.gz +0 -0
  17. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Main.list.jsonl.gz +0 -0
  18. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.NbRoot.list.jsonl.gz +0 -0
  19. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Number.list.jsonl.gz +0 -0
  20. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Number2.list.jsonl.gz +0 -0
  21. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Type.list.jsonl.gz +0 -0
  22. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.list.jsonl.gz +0 -0
  23. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Lemmas.Voweled.Canonic.map.jsonl.gz +0 -0
  24. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Lemmas.Voweled.Diac.map.jsonl.gz +0 -0
  25. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Stems.Unvoweled.map.jsonl.gz +0 -0
  26. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Stems.Voweled.Canonic.map.jsonl.gz +0 -0
  27. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Stems.Voweled.Diac.map.jsonl.gz +0 -0
  28. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Roots.Quadriliteral.map.jsonl.gz +0 -0
  29. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Roots.Trilateral.map.jsonl.gz +0 -0
  30. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Roots.id.Quadriliteral.map.jsonl.gz +0 -0
  31. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Roots.id.Trilateral.map.jsonl.gz +0 -0
  32. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.CaseOrMood.list.jsonl.gz +0 -0
  33. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Formulas.map.jsonl.gz +0 -0
  34. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Augmented.list.jsonl.gz +0 -0
  35. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Emphasized.list.jsonl.gz +0 -0
  36. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Emphasized2.list.jsonl.gz +0 -0
  37. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Main.list.jsonl.gz +0 -0
  38. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.NbRoot.list.jsonl.gz +0 -0
  39. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Person.list.jsonl.gz +0 -0
  40. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Person2.list.jsonl.gz +0 -0
  41. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Transitivity.list.jsonl.gz +0 -0
  42. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Type.list.jsonl.gz +0 -0
  43. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Voice.list.jsonl.gz +0 -0
  44. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.list.jsonl.gz +0 -0
  45. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Lemmas.Voweled.Canonic.map.jsonl.gz +0 -0
  46. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Lemmas.Voweled.Diac.map.jsonl.gz +0 -0
  47. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Stems.Unvoweled.map.jsonl.gz +0 -0
  48. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Stems.Voweled.Canonic.map.jsonl.gz +0 -0
  49. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Stems.Voweled.Diac.map.jsonl.gz +0 -0
  50. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.Quadriliteral.map.jsonl.gz +0 -0
  51. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.Trilateral.map.jsonl.gz +0 -0
  52. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.id.Quadriliteral.map.jsonl.gz +0 -0
  53. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.id.Quadriliteral2.map.jsonl.gz +0 -0
  54. farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.id.Trilateral.map.jsonl.gz +0 -0
  55. farahidi-0.3.0/src/farahidi/data/DATA.Exceptional.map.jsonl.gz +0 -0
  56. farahidi-0.3.0/src/farahidi/data/DATA.MSA-LEMMA.ALL-train.map.jsonl.gz +0 -0
  57. farahidi-0.3.0/src/farahidi/data/DATA.MSA.ALL.TRAIN.141809.lm.gz +0 -0
  58. farahidi-0.3.0/src/farahidi/data/DATA.MSA.SHA.ROOT.map.jsonl.gz +0 -0
  59. farahidi-0.3.0/src/farahidi/data/DATA.MSA.SHA.STEM.map.jsonl.gz +0 -0
  60. farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Propernoun.CaseOrMood.list.jsonl.gz +0 -0
  61. farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Propernoun.PartOfSpeech.list.jsonl.gz +0 -0
  62. farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Propernoun.Unvoweled.map.jsonl.gz +0 -0
  63. farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Propernoun.Voweled.list.jsonl.gz +0 -0
  64. farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Toolwords.PartOfSpeech.list.jsonl.gz +0 -0
  65. farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Toolwords.Unvoweled.map.jsonl.gz +0 -0
  66. farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Toolwords.Voweled.list.jsonl.gz +0 -0
  67. farahidi-0.3.0/src/farahidi/data/DATA.Root.map.jsonl.gz +0 -0
  68. farahidi-0.3.0/src/farahidi/data/MANIFEST.txt +58 -0
  69. farahidi-0.3.0/src/farahidi/disambiguate.py +238 -0
  70. farahidi-0.3.0/src/farahidi/lexicon.py +192 -0
  71. farahidi-0.3.0/src/farahidi/lm.py +133 -0
  72. farahidi-0.3.0/src/farahidi/models.py +54 -0
  73. farahidi-0.3.0/src/farahidi/normalize.py +388 -0
  74. farahidi-0.3.0/src/farahidi/pos.py +73 -0
  75. farahidi-0.3.0/src/farahidi/translit.py +86 -0
  76. farahidi-0.3.0/src/farahidi/vowelize.py +89 -0
  77. farahidi-0.3.0/tests/fixtures/golden.jsonl +376 -0
  78. farahidi-0.3.0/tests/fixtures/sentences.jsonl +131 -0
  79. farahidi-0.3.0/tests/test_cli.py +105 -0
  80. farahidi-0.3.0/tests/test_clitics.py +47 -0
  81. farahidi-0.3.0/tests/test_disambiguate.py +50 -0
  82. farahidi-0.3.0/tests/test_golden.py +86 -0
  83. farahidi-0.3.0/tests/test_normalize.py +59 -0
  84. farahidi-0.3.0/tools/AlkhalilGolden.java +68 -0
  85. farahidi-0.3.0/tools/AlkhalilSentenceGolden.java +97 -0
  86. farahidi-0.3.0/tools/build_data.py +115 -0
  87. farahidi-0.3.0/tools/gen_golden.py +78 -0
  88. farahidi-0.3.0/tools/sentences.txt +131 -0
  89. farahidi-0.3.0/tools/wordlist.txt +376 -0
@@ -0,0 +1,24 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+ .venv/
9
+ venv/
10
+
11
+ # uv
12
+ .uv/
13
+
14
+ # tooling caches
15
+ .pytest_cache/
16
+ .ruff_cache/
17
+ .mypy_cache/
18
+
19
+ # OS
20
+ .DS_Store
21
+
22
+ # golden-harness build artifacts (regenerated from the parent repo)
23
+ tools/build/
24
+ tools/*.class