farahidi 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- farahidi-0.3.0/.gitignore +24 -0
- farahidi-0.3.0/LICENSE +674 -0
- farahidi-0.3.0/NOTICE +28 -0
- farahidi-0.3.0/PKG-INFO +191 -0
- farahidi-0.3.0/README.md +162 -0
- farahidi-0.3.0/pyproject.toml +74 -0
- farahidi-0.3.0/src/farahidi/__init__.py +57 -0
- farahidi-0.3.0/src/farahidi/analyzer.py +728 -0
- farahidi-0.3.0/src/farahidi/cli.py +184 -0
- farahidi-0.3.0/src/farahidi/clitics.py +150 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Clitics.Enclitics.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Clitics.Proclitics.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.CaseOrMood.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Formulas.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Definit.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Gender.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Main.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.NbRoot.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Number.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Number2.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.Type.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.PartOfSpeech.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Lemmas.Voweled.Canonic.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Lemmas.Voweled.Diac.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Stems.Unvoweled.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Stems.Voweled.Canonic.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Patterns.Stems.Voweled.Diac.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Roots.Quadriliteral.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Roots.Trilateral.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Roots.id.Quadriliteral.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Nouns.Roots.id.Trilateral.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.CaseOrMood.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Formulas.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Augmented.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Emphasized.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Emphasized2.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Main.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.NbRoot.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Person.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Person2.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Transitivity.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Type.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.Voice.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.PartOfSpeech.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Lemmas.Voweled.Canonic.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Lemmas.Voweled.Diac.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Stems.Unvoweled.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Stems.Voweled.Canonic.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Patterns.Stems.Voweled.Diac.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.Quadriliteral.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.Trilateral.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.id.Quadriliteral.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.id.Quadriliteral2.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Derived.Verbs.Roots.id.Trilateral.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Exceptional.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.MSA-LEMMA.ALL-train.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.MSA.ALL.TRAIN.141809.lm.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.MSA.SHA.ROOT.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.MSA.SHA.STEM.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Propernoun.CaseOrMood.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Propernoun.PartOfSpeech.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Propernoun.Unvoweled.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Propernoun.Voweled.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Toolwords.PartOfSpeech.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Toolwords.Unvoweled.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.NonDerived.Toolwords.Voweled.list.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/DATA.Root.map.jsonl.gz +0 -0
- farahidi-0.3.0/src/farahidi/data/MANIFEST.txt +58 -0
- farahidi-0.3.0/src/farahidi/disambiguate.py +238 -0
- farahidi-0.3.0/src/farahidi/lexicon.py +192 -0
- farahidi-0.3.0/src/farahidi/lm.py +133 -0
- farahidi-0.3.0/src/farahidi/models.py +54 -0
- farahidi-0.3.0/src/farahidi/normalize.py +388 -0
- farahidi-0.3.0/src/farahidi/pos.py +73 -0
- farahidi-0.3.0/src/farahidi/translit.py +86 -0
- farahidi-0.3.0/src/farahidi/vowelize.py +89 -0
- farahidi-0.3.0/tests/fixtures/golden.jsonl +376 -0
- farahidi-0.3.0/tests/fixtures/sentences.jsonl +131 -0
- farahidi-0.3.0/tests/test_cli.py +105 -0
- farahidi-0.3.0/tests/test_clitics.py +47 -0
- farahidi-0.3.0/tests/test_disambiguate.py +50 -0
- farahidi-0.3.0/tests/test_golden.py +86 -0
- farahidi-0.3.0/tests/test_normalize.py +59 -0
- farahidi-0.3.0/tools/AlkhalilGolden.java +68 -0
- farahidi-0.3.0/tools/AlkhalilSentenceGolden.java +97 -0
- farahidi-0.3.0/tools/build_data.py +115 -0
- farahidi-0.3.0/tools/gen_golden.py +78 -0
- farahidi-0.3.0/tools/sentences.txt +131 -0
- farahidi-0.3.0/tools/wordlist.txt +376 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
.venv/
|
|
9
|
+
venv/
|
|
10
|
+
|
|
11
|
+
# uv
|
|
12
|
+
.uv/
|
|
13
|
+
|
|
14
|
+
# tooling caches
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
.ruff_cache/
|
|
17
|
+
.mypy_cache/
|
|
18
|
+
|
|
19
|
+
# OS
|
|
20
|
+
.DS_Store
|
|
21
|
+
|
|
22
|
+
# golden-harness build artifacts (regenerated from the parent repo)
|
|
23
|
+
tools/build/
|
|
24
|
+
tools/*.class
|