airbyte-cdk 6.13.1.dev4106__py3-none-any.whl → 6.13.1.dev4108__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,16 +42,32 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
42
42
  unstructured_partition_pdf = None
43
43
  unstructured_partition_docx = None
44
44
  unstructured_partition_pptx = None
45
- nltk_data_dir = "/tmp/nltk_data"
45
+
46
+
47
+ def get_ntlk_temp_folder() -> str:
48
+ """
49
+ For non-root connectors /tmp is not currently writable, but we should allow it in the future.
50
+ It's safe to use /airbyte for now. Fallback to /tmp for local development.
51
+ """
52
+ try:
53
+ nltk_data_dir = "/airbyte/nltk_data"
54
+ os.makedirs(nltk_data_dir, exist_ok=True)
55
+ except OSError:
56
+ nltk_data_dir = "/tmp/nltk_data"
57
+ os.makedirs(nltk_data_dir, exist_ok=True)
58
+ return nltk_data_dir
59
+
46
60
 
47
61
  try:
48
- os.makedirs(nltk_data_dir, exist_ok=True)
62
+ nltk_data_dir = get_ntlk_temp_folder()
49
63
  nltk.data.path.append(nltk_data_dir)
50
64
  nltk.data.find("tokenizers/punkt.zip")
51
65
  nltk.data.find("tokenizers/punkt_tab.zip")
66
+ nltk.data.find("tokenizers/averaged_perceptron_tagger_eng.zip")
52
67
  except LookupError:
53
68
  nltk.download("punkt", download_dir=nltk_data_dir)
54
69
  nltk.download("punkt_tab", download_dir=nltk_data_dir)
70
+ nltk.download("averaged_perceptron_tagger_eng", download_dir=nltk_data_dir)
55
71
 
56
72
 
57
73
  def optional_decode(contents: Union[str, bytes]) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.13.1.dev4106
3
+ Version: 6.13.1.dev4108
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  License: MIT
6
6
  Keywords: airbyte,connector-development-kit,cdk
@@ -217,7 +217,7 @@ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=HyGRihJxcb_lEs
217
217
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
218
218
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-XpXy7xVH0D-znYWWBmGv_pVAu95oHQ,5886
219
219
  airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
220
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=vypnocjZkMIkc84XeYtTMO0Vy70wXwHZ0Ug3B4sD_dM,18788
220
+ airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=FOZL-RiXc7sndfdYZtLVSR93c_eHlKaS_nv2KrqFu2E,19371
221
221
  airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
222
222
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=Cf8FH1bDFP0qCDDfEYir_WjP4exXUnikz8hZ40y1Ek0,9601
223
223
  airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
@@ -342,8 +342,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
342
342
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
343
343
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
344
344
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
345
- airbyte_cdk-6.13.1.dev4106.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
346
- airbyte_cdk-6.13.1.dev4106.dist-info/METADATA,sha256=zMtiITJXfqZ91Kxu5yHIvi1S4rXhdN_ktOCOi7-pC30,6008
347
- airbyte_cdk-6.13.1.dev4106.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
348
- airbyte_cdk-6.13.1.dev4106.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
349
- airbyte_cdk-6.13.1.dev4106.dist-info/RECORD,,
345
+ airbyte_cdk-6.13.1.dev4108.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
346
+ airbyte_cdk-6.13.1.dev4108.dist-info/METADATA,sha256=4MmYepPLObB4KNT5D7ttKKhwRdVaXdJnZMcOLewYXOU,6008
347
+ airbyte_cdk-6.13.1.dev4108.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
348
+ airbyte_cdk-6.13.1.dev4108.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
349
+ airbyte_cdk-6.13.1.dev4108.dist-info/RECORD,,