airbyte-cdk 6.13.1.dev4106__py3-none-any.whl → 6.13.1.dev4108__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -42,16 +42,32 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
42
42
  unstructured_partition_pdf = None
43
43
  unstructured_partition_docx = None
44
44
  unstructured_partition_pptx = None
45
- nltk_data_dir = "/tmp/nltk_data"
45
+
46
+
47
+ def get_ntlk_temp_folder() -> str:
48
+ """
49
+ For non-root connectors /tmp is not currently writable, but we should allow it in the future.
50
+ It's safe to use /airbyte for now. Fallback to /tmp for local development.
51
+ """
52
+ try:
53
+ nltk_data_dir = "/airbyte/nltk_data"
54
+ os.makedirs(nltk_data_dir, exist_ok=True)
55
+ except OSError:
56
+ nltk_data_dir = "/tmp/nltk_data"
57
+ os.makedirs(nltk_data_dir, exist_ok=True)
58
+ return nltk_data_dir
59
+
46
60
 
47
61
  try:
48
- os.makedirs(nltk_data_dir, exist_ok=True)
62
+ nltk_data_dir = get_ntlk_temp_folder()
49
63
  nltk.data.path.append(nltk_data_dir)
50
64
  nltk.data.find("tokenizers/punkt.zip")
51
65
  nltk.data.find("tokenizers/punkt_tab.zip")
66
+ nltk.data.find("tokenizers/averaged_perceptron_tagger_eng.zip")
52
67
  except LookupError:
53
68
  nltk.download("punkt", download_dir=nltk_data_dir)
54
69
  nltk.download("punkt_tab", download_dir=nltk_data_dir)
70
+ nltk.download("averaged_perceptron_tagger_eng", download_dir=nltk_data_dir)
55
71
 
56
72
 
57
73
  def optional_decode(contents: Union[str, bytes]) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.13.1.dev4106
3
+ Version: 6.13.1.dev4108
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  License: MIT
6
6
  Keywords: airbyte,connector-development-kit,cdk
@@ -217,7 +217,7 @@ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=HyGRihJxcb_lEs
217
217
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
218
218
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-XpXy7xVH0D-znYWWBmGv_pVAu95oHQ,5886
219
219
  airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
220
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=vypnocjZkMIkc84XeYtTMO0Vy70wXwHZ0Ug3B4sD_dM,18788
220
+ airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=FOZL-RiXc7sndfdYZtLVSR93c_eHlKaS_nv2KrqFu2E,19371
221
221
  airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
222
222
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=Cf8FH1bDFP0qCDDfEYir_WjP4exXUnikz8hZ40y1Ek0,9601
223
223
  airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
@@ -342,8 +342,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
342
342
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
343
343
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
344
344
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
345
- airbyte_cdk-6.13.1.dev4106.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
346
- airbyte_cdk-6.13.1.dev4106.dist-info/METADATA,sha256=zMtiITJXfqZ91Kxu5yHIvi1S4rXhdN_ktOCOi7-pC30,6008
347
- airbyte_cdk-6.13.1.dev4106.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
348
- airbyte_cdk-6.13.1.dev4106.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
349
- airbyte_cdk-6.13.1.dev4106.dist-info/RECORD,,
345
+ airbyte_cdk-6.13.1.dev4108.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
346
+ airbyte_cdk-6.13.1.dev4108.dist-info/METADATA,sha256=4MmYepPLObB4KNT5D7ttKKhwRdVaXdJnZMcOLewYXOU,6008
347
+ airbyte_cdk-6.13.1.dev4108.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
348
+ airbyte_cdk-6.13.1.dev4108.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
349
+ airbyte_cdk-6.13.1.dev4108.dist-info/RECORD,,