airbyte-cdk 6.13.1.dev4106__py3-none-any.whl → 6.13.1.dev4108__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +18 -2
- {airbyte_cdk-6.13.1.dev4106.dist-info → airbyte_cdk-6.13.1.dev4108.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.13.1.dev4106.dist-info → airbyte_cdk-6.13.1.dev4108.dist-info}/RECORD +6 -6
- {airbyte_cdk-6.13.1.dev4106.dist-info → airbyte_cdk-6.13.1.dev4108.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.13.1.dev4106.dist-info → airbyte_cdk-6.13.1.dev4108.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.13.1.dev4106.dist-info → airbyte_cdk-6.13.1.dev4108.dist-info}/entry_points.txt +0 -0
@@ -42,16 +42,32 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
42
42
|
unstructured_partition_pdf = None
|
43
43
|
unstructured_partition_docx = None
|
44
44
|
unstructured_partition_pptx = None
|
45
|
-
|
45
|
+
|
46
|
+
|
47
|
+
def get_ntlk_temp_folder() -> str:
|
48
|
+
"""
|
49
|
+
For non-root connectors /tmp is not currently writable, but we should allow it in the future.
|
50
|
+
It's safe to use /airbyte for now. Fallback to /tmp for local development.
|
51
|
+
"""
|
52
|
+
try:
|
53
|
+
nltk_data_dir = "/airbyte/nltk_data"
|
54
|
+
os.makedirs(nltk_data_dir, exist_ok=True)
|
55
|
+
except OSError:
|
56
|
+
nltk_data_dir = "/tmp/nltk_data"
|
57
|
+
os.makedirs(nltk_data_dir, exist_ok=True)
|
58
|
+
return nltk_data_dir
|
59
|
+
|
46
60
|
|
47
61
|
try:
|
48
|
-
|
62
|
+
nltk_data_dir = get_ntlk_temp_folder()
|
49
63
|
nltk.data.path.append(nltk_data_dir)
|
50
64
|
nltk.data.find("tokenizers/punkt.zip")
|
51
65
|
nltk.data.find("tokenizers/punkt_tab.zip")
|
66
|
+
nltk.data.find("tokenizers/averaged_perceptron_tagger_eng.zip")
|
52
67
|
except LookupError:
|
53
68
|
nltk.download("punkt", download_dir=nltk_data_dir)
|
54
69
|
nltk.download("punkt_tab", download_dir=nltk_data_dir)
|
70
|
+
nltk.download("averaged_perceptron_tagger_eng", download_dir=nltk_data_dir)
|
55
71
|
|
56
72
|
|
57
73
|
def optional_decode(contents: Union[str, bytes]) -> str:
|
@@ -217,7 +217,7 @@ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=HyGRihJxcb_lEs
|
|
217
217
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
|
218
218
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-XpXy7xVH0D-znYWWBmGv_pVAu95oHQ,5886
|
219
219
|
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
|
220
|
-
airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=
|
220
|
+
airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=FOZL-RiXc7sndfdYZtLVSR93c_eHlKaS_nv2KrqFu2E,19371
|
221
221
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
|
222
222
|
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=Cf8FH1bDFP0qCDDfEYir_WjP4exXUnikz8hZ40y1Ek0,9601
|
223
223
|
airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
|
@@ -342,8 +342,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
342
342
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
343
343
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
344
344
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
345
|
-
airbyte_cdk-6.13.1.
|
346
|
-
airbyte_cdk-6.13.1.
|
347
|
-
airbyte_cdk-6.13.1.
|
348
|
-
airbyte_cdk-6.13.1.
|
349
|
-
airbyte_cdk-6.13.1.
|
345
|
+
airbyte_cdk-6.13.1.dev4108.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
346
|
+
airbyte_cdk-6.13.1.dev4108.dist-info/METADATA,sha256=4MmYepPLObB4KNT5D7ttKKhwRdVaXdJnZMcOLewYXOU,6008
|
347
|
+
airbyte_cdk-6.13.1.dev4108.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
348
|
+
airbyte_cdk-6.13.1.dev4108.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
349
|
+
airbyte_cdk-6.13.1.dev4108.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{airbyte_cdk-6.13.1.dev4106.dist-info → airbyte_cdk-6.13.1.dev4108.dist-info}/entry_points.txt
RENAMED
File without changes
|