dalla-data-processing 0.0.2__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/PKG-INFO +5 -6
  2. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/_version.py +3 -3
  3. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/cli.py +9 -1
  4. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/quality/checker.py +20 -0
  5. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/stemming/stemmer.py +22 -9
  6. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing.egg-info/PKG-INFO +5 -6
  7. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing.egg-info/requires.txt +3 -4
  8. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/pyproject.toml +6 -5
  9. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/.dockerignore +0 -0
  10. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/.github/workflows/ci.yml +0 -0
  11. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/.github/workflows/release.yml +0 -0
  12. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/.gitignore +0 -0
  13. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/.pre-commit-config.yaml +0 -0
  14. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/MANIFEST.in +0 -0
  15. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/README.md +0 -0
  16. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/__init__.py +0 -0
  17. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/core/README.md +0 -0
  18. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/core/__init__.py +0 -0
  19. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/core/dataset.py +0 -0
  20. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/core/parallel.py +0 -0
  21. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/README.md +0 -0
  22. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/__init__.py +0 -0
  23. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/bin/.gitignore +0 -0
  24. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/COPYING +0 -0
  25. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/Makefile +0 -0
  26. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/Makefile.config +0 -0
  27. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/README.md +0 -0
  28. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/Makefile +0 -0
  29. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/Makefile.g +0 -0
  30. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/buzhash.c +0 -0
  31. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/buzhash.h +0 -0
  32. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/hashdup.c +0 -0
  33. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/hashgen.c +0 -0
  34. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/onion +0 -0
  35. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/onion.c +0 -0
  36. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/onion_dup.c +0 -0
  37. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/version.c +0 -0
  38. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src/version.h +0 -0
  39. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/.gitignore +0 -0
  40. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/Makefile +0 -0
  41. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/Makefile.g +0 -0
  42. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/buzhash.c +0 -0
  43. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/buzhash.h +0 -0
  44. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/hashdup +0 -0
  45. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/hashdup.c +0 -0
  46. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/hashgen +0 -0
  47. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/hashgen.c +0 -0
  48. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/onion.c +0 -0
  49. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/onion_dup.c +0 -0
  50. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/version.c +0 -0
  51. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion/src_sc/version.h +0 -0
  52. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/onion_wrapper.py +0 -0
  53. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/postprocessing.py +0 -0
  54. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/deduplication/preprocessing.py +0 -0
  55. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/packing/README.md +0 -0
  56. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/packing/__init__.py +0 -0
  57. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/packing/dataset_packer.py +0 -0
  58. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/packing/pack_config.example.yaml +0 -0
  59. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/quality/README.md +0 -0
  60. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/quality/__init__.py +0 -0
  61. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/readability/README.md +0 -0
  62. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/readability/__init__.py +0 -0
  63. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/readability/ranking.py +0 -0
  64. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/readability/scorer.py +0 -0
  65. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/stemming/README.md +0 -0
  66. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/stemming/__init__.py +0 -0
  67. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/stemming/data/words_al.txt +0 -0
  68. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/stemming/data/words_al_t.txt +0 -0
  69. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/stemming/data/words_t.txt +0 -0
  70. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/utils/__init__.py +0 -0
  71. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/utils/logger.py +0 -0
  72. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing/utils/tokenize.py +0 -0
  73. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing.egg-info/SOURCES.txt +0 -0
  74. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing.egg-info/dependency_links.txt +0 -0
  75. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing.egg-info/entry_points.txt +0 -0
  76. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/dalla_data_processing.egg-info/top_level.txt +0 -0
  77. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/scripts/build_onion.sh +0 -0
  78. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/scripts/release.sh +0 -0
  79. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/setup.cfg +0 -0
  80. {dalla_data_processing-0.0.2 → dalla_data_processing-0.0.3}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dalla-data-processing
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: data processing pipeline with deduplication, stemming, quality checking, and readability scoring, used for the DALLA Models
5
5
  Author-email: Hadi Hamoud <hhamoud@dohainstitute.edu.qa>, Digital Research Unit - Arab Center <dru@dohainstitute.edu.qa>
6
6
  Project-URL: Homepage, https://github.com/U4RASD/dalla-data-processing
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
16
  Classifier: Topic :: Text Processing :: Linguistic
17
- Requires-Python: >=3.12
17
+ Requires-Python: <3.13,>=3.12
18
18
  Description-Content-Type: text/markdown
19
19
  Requires-Dist: datasets>=2.14.0
20
20
  Requires-Dist: transformers>=4.30.0
@@ -28,18 +28,17 @@ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
28
28
  Requires-Dist: ruff>=0.1.0; extra == "dev"
29
29
  Requires-Dist: pre-commit>=3.0.0; extra == "dev"
30
30
  Provides-Extra: dedup
31
- Requires-Dist: camel-tools>=1.5.0; extra == "dedup"
31
+ Requires-Dist: camel-tools==1.5.7; extra == "dedup"
32
32
  Provides-Extra: dedup-native
33
33
  Requires-Dist: cffi>=1.15.0; extra == "dedup-native"
34
34
  Provides-Extra: stem
35
- Requires-Dist: camel-tools>=1.5.0; extra == "stem"
35
+ Requires-Dist: camel-tools==1.5.7; extra == "stem"
36
36
  Provides-Extra: quality
37
- Requires-Dist: camel-tools>=1.5.0; extra == "quality"
37
+ Requires-Dist: camel-tools==1.5.7; extra == "quality"
38
38
  Provides-Extra: readability
39
39
  Requires-Dist: textstat>=0.7.0; extra == "readability"
40
40
  Provides-Extra: pack
41
41
  Requires-Dist: sentencepiece>=0.2.0; extra == "pack"
42
- Requires-Dist: rbpe; extra == "pack"
43
42
  Requires-Dist: pyyaml; extra == "pack"
44
43
  Provides-Extra: all
45
44
  Requires-Dist: dalla-data-processing[dedup,dedup-native,dev,pack,quality,readability,stem]; extra == "all"
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.0.2'
32
- __version_tuple__ = version_tuple = (0, 0, 2)
31
+ __version__ = version = '0.0.3'
32
+ __version_tuple__ = version_tuple = (0, 0, 3)
33
33
 
34
- __commit_id__ = commit_id = 'g68cee46a4'
34
+ __commit_id__ = commit_id = 'g37580acc9'
@@ -651,7 +651,15 @@ def pack(
651
651
  tokenizer = RBPETokenizer.from_pretrained(config_data["tokenizer_path"])
652
652
  except ImportError:
653
653
  logger.error("Missing rbpe package")
654
- logger.error("Install with: pip install rbpe")
654
+ logger.error(
655
+ "rbpe is not included in the default installation due to "
656
+ "dependency conflicts with camel-tools (transformers version requirements)"
657
+ )
658
+ logger.error("Install separately with: pip install rbpe")
659
+ logger.error(
660
+ "Note: Installing rbpe may require a separate environment "
661
+ "if you also use dedup/stem/quality features"
662
+ )
655
663
  sys.exit(1)
656
664
  else:
657
665
  try:
@@ -11,6 +11,7 @@ from concurrent.futures import TimeoutError as FutureTimeoutError
11
11
  from types import MethodType
12
12
  from typing import Any
13
13
 
14
+ from camel_tools.data.catalogue import Catalogue
14
15
  from camel_tools.disambig.bert import BERTUnfactoredDisambiguator
15
16
  from camel_tools.disambig.mle import MLEDisambiguator
16
17
  from datasets import Dataset
@@ -53,6 +54,25 @@ class QualityChecker:
53
54
 
54
55
  def _init_disambiguator(self):
55
56
  """Initialize and configure the disambiguator with caching."""
57
+ # Install required CAMeL Tools packages based on model type
58
+ logger.info("Checking CAMeL Tools data packages...")
59
+ catalogue = Catalogue.load_catalogue()
60
+
61
+ try:
62
+ catalogue.download_package("morphology-db-msa-r13")
63
+ catalogue.download_package("disambig-mle-calima-msa-r13")
64
+ logger.info("msa-r13 packages installed")
65
+ except Exception as e:
66
+ logger.warning(f"Package installation warning: {e}")
67
+
68
+ # Install BERT package if using BERT model
69
+ if self.model == "bert":
70
+ try:
71
+ catalogue.download_package("disambig-bert-unfactored-all")
72
+ logger.info("BERT package installed")
73
+ except Exception as e:
74
+ logger.warning(f"BERT package installation warning: {e}")
75
+
56
76
  if self.model == "mle":
57
77
  self.disambiguator = MLEDisambiguator.pretrained()
58
78
  logger.info("MLE disambiguator loaded")
@@ -473,12 +473,19 @@ def stem_dataset(
473
473
  catalogue = Catalogue.load_catalogue()
474
474
  try:
475
475
  catalogue.download_package("morphology-db-msa-r13")
476
- if model == "mle":
477
- catalogue.download_package("disambig-mle-calima-msa-r13")
478
- # For BERT, let it download automatically when pretrained() is called
479
- logger.info("CAMeL Tools data packages ready")
476
+ catalogue.download_package("disambig-mle-calima-msa-r13")
477
+ logger.info("msa-r13 packages installed")
480
478
  except Exception as e:
481
- logger.warning(f"Could not verify CAMeL packages: {e}")
479
+ logger.warning(f"Package installation warning: {e}")
480
+
481
+ if model == "bert":
482
+ try:
483
+ catalogue.download_package("disambig-bert-unfactored-all")
484
+ logger.info("BERT package installed")
485
+ except Exception as e:
486
+ logger.warning(f"BERT package installation warning: {e}")
487
+
488
+ logger.info("CAMeL Tools data packages ready")
482
489
 
483
490
  logger.info("Loading additional words lists...")
484
491
  words_dir = os.path.join(os.path.dirname(__file__), "data")
@@ -597,15 +604,21 @@ def stem(
597
604
  if not all(isinstance(t, str) for t in text_list):
598
605
  raise TypeError("All items in text list must be strings")
599
606
 
600
- # Initialize disambiguator (cached globally if possible)
601
607
  logger.info(f"Initializing {model.upper()} disambiguator...")
602
608
  catalogue = Catalogue.load_catalogue()
603
609
  try:
604
610
  catalogue.download_package("morphology-db-msa-r13")
605
- if model == "mle":
606
- catalogue.download_package("disambig-mle-calima-msa-r13")
611
+ catalogue.download_package("disambig-mle-calima-msa-r13")
612
+ logger.info("msa-r13 packages installed")
607
613
  except Exception as e:
608
- logger.warning(f"Could not verify CAMeL packages: {e}")
614
+ logger.warning(f"Package installation warning: {e}")
615
+
616
+ if model == "bert":
617
+ try:
618
+ catalogue.download_package("disambig-bert-unfactored-all")
619
+ logger.info("BERT package installed")
620
+ except Exception as e:
621
+ logger.warning(f"BERT package installation warning: {e}")
609
622
 
610
623
  if model == "mle":
611
624
  disambiguator = MLEDisambiguator.pretrained("calima-msa-r13", cache_size=1_000_000)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dalla-data-processing
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: data processing pipeline with deduplication, stemming, quality checking, and readability scoring, used for the DALLA Models
5
5
  Author-email: Hadi Hamoud <hhamoud@dohainstitute.edu.qa>, Digital Research Unit - Arab Center <dru@dohainstitute.edu.qa>
6
6
  Project-URL: Homepage, https://github.com/U4RASD/dalla-data-processing
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
16
  Classifier: Topic :: Text Processing :: Linguistic
17
- Requires-Python: >=3.12
17
+ Requires-Python: <3.13,>=3.12
18
18
  Description-Content-Type: text/markdown
19
19
  Requires-Dist: datasets>=2.14.0
20
20
  Requires-Dist: transformers>=4.30.0
@@ -28,18 +28,17 @@ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
28
28
  Requires-Dist: ruff>=0.1.0; extra == "dev"
29
29
  Requires-Dist: pre-commit>=3.0.0; extra == "dev"
30
30
  Provides-Extra: dedup
31
- Requires-Dist: camel-tools>=1.5.0; extra == "dedup"
31
+ Requires-Dist: camel-tools==1.5.7; extra == "dedup"
32
32
  Provides-Extra: dedup-native
33
33
  Requires-Dist: cffi>=1.15.0; extra == "dedup-native"
34
34
  Provides-Extra: stem
35
- Requires-Dist: camel-tools>=1.5.0; extra == "stem"
35
+ Requires-Dist: camel-tools==1.5.7; extra == "stem"
36
36
  Provides-Extra: quality
37
- Requires-Dist: camel-tools>=1.5.0; extra == "quality"
37
+ Requires-Dist: camel-tools==1.5.7; extra == "quality"
38
38
  Provides-Extra: readability
39
39
  Requires-Dist: textstat>=0.7.0; extra == "readability"
40
40
  Provides-Extra: pack
41
41
  Requires-Dist: sentencepiece>=0.2.0; extra == "pack"
42
- Requires-Dist: rbpe; extra == "pack"
43
42
  Requires-Dist: pyyaml; extra == "pack"
44
43
  Provides-Extra: all
45
44
  Requires-Dist: dalla-data-processing[dedup,dedup-native,dev,pack,quality,readability,stem]; extra == "all"
@@ -9,7 +9,7 @@ structlog>=24.0.0
9
9
  dalla-data-processing[dedup,dedup-native,dev,pack,quality,readability,stem]
10
10
 
11
11
  [dedup]
12
- camel-tools>=1.5.0
12
+ camel-tools==1.5.7
13
13
 
14
14
  [dedup-native]
15
15
  cffi>=1.15.0
@@ -22,14 +22,13 @@ pre-commit>=3.0.0
22
22
 
23
23
  [pack]
24
24
  sentencepiece>=0.2.0
25
- rbpe
26
25
  pyyaml
27
26
 
28
27
  [quality]
29
- camel-tools>=1.5.0
28
+ camel-tools==1.5.7
30
29
 
31
30
  [readability]
32
31
  textstat>=0.7.0
33
32
 
34
33
  [stem]
35
- camel-tools>=1.5.0
34
+ camel-tools==1.5.7
@@ -11,7 +11,7 @@ authors = [
11
11
  {name = "Digital Research Unit - Arab Center", email = "dru@dohainstitute.edu.qa"}
12
12
  ]
13
13
  readme = "README.md"
14
- requires-python = ">=3.12"
14
+ requires-python = ">=3.12,<3.13"
15
15
  keywords = ["arabic", "nlp", "data-processing", "deduplication", "stemming", "readability", "quality"]
16
16
  classifiers = [
17
17
  "Intended Audience :: Developers",
@@ -39,23 +39,24 @@ dev = [
39
39
  "pre-commit>=3.0.0",
40
40
  ]
41
41
  dedup = [
42
- "camel-tools>=1.5.0",
42
+ "camel-tools==1.5.7",
43
43
  ]
44
44
  dedup-native = [
45
45
  "cffi>=1.15.0",
46
46
  ]
47
47
  stem = [
48
- "camel-tools>=1.5.0",
48
+ "camel-tools==1.5.7",
49
49
  ]
50
50
  quality = [
51
- "camel-tools>=1.5.0",
51
+ "camel-tools==1.5.7",
52
52
  ]
53
53
  readability = [
54
54
  "textstat>=0.7.0",
55
55
  ]
56
56
  pack = [
57
57
  "sentencepiece>=0.2.0",
58
- "rbpe",
58
+ # "rbpe", # excluded due to transformers version conflict with camel-tools
59
+ # users should install separately if needed: pip install rbpe
59
60
  "pyyaml",
60
61
  ]
61
62
  all = [