data-prep-connector 0.2.3.dev0__py3-none-any.whl → 0.2.4.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: data_prep_connector
3
- Version: 0.2.3.dev0
3
+ Version: 0.2.4.dev1
4
4
  Summary: Scalable and Compliant Web Crawler
5
5
  Author-email: Hiroya Matsubara <hmtbr@jp.ibm.com>
6
6
  License: Apache-2.0
7
7
  Keywords: data,data acquisition,crawler,web crawler,llm,generative,ai,fine-tuning,llmapps
8
- Requires-Python: <3.13,>=3.10
8
+ Requires-Python: <3.14,>=3.10
9
9
  Description-Content-Type: text/markdown
10
- Requires-Dist: scrapy>=2.11.2
10
+ Requires-Dist: scrapy==2.12.0
11
11
  Requires-Dist: pydantic>=2.8.1
12
12
  Requires-Dist: tldextract>=5.1.2
13
13
  Provides-Extra: dev
@@ -0,0 +1,15 @@
1
+ dpk_connector/__init__.py,sha256=IsuvsEgr98JsjrRQ0yCeddticEwRoLpgl6nxylDIoBM,773
2
+ dpk_connector/core/__init__.py,sha256=6bsB2K0dllWaJ4gdyVp3id6F26BspNBbvqRi9X598C8,696
3
+ dpk_connector/core/crawler.py,sha256=tZji56RrUI7YQ_rbv3cGjCfkZFqelMT-F23E_Mn6NHk,13574
4
+ dpk_connector/core/item.py,sha256=aJ-cis6s4F1lEqISVf7lILFF9KVUrebunUPIz1XcOTo,879
5
+ dpk_connector/core/logging.py,sha256=ML1Pl4ClX5HGqn2d25dNdtFGGk_lR4MBjnLECphCLY0,1019
6
+ dpk_connector/core/middlewares.py,sha256=tr4Go4b2bENM9xewuvp81jL2zQEsZYL_ggBtEFm54OM,9980
7
+ dpk_connector/core/pipelines.py,sha256=nm1fl7gLonR2FxEexyRfXJcMbvdNphZ-9lPrXrK5o5U,1153
8
+ dpk_connector/core/settings.py,sha256=kw12cE8SWMp0CC9m7RIkk1jnpEeKVHj4ZkcSXFslUjc,2031
9
+ dpk_connector/core/utils.py,sha256=CBQWAmiZKasKLAMqxqL2jKniPOF7IOJiRC5KOssN6kM,3072
10
+ dpk_connector/core/spiders/__init__.py,sha256=6bsB2K0dllWaJ4gdyVp3id6F26BspNBbvqRi9X598C8,696
11
+ dpk_connector/core/spiders/sitemap.py,sha256=v5nTPUrT5jun6vIXvo5Des5T_kgtBJjOXfOxQbXDYkA,12638
12
+ data_prep_connector-0.2.4.dev1.dist-info/METADATA,sha256=X7V13zoZyHyntLCO6GJ6Q1x4gycDb9KEZovFbRE5GF0,1839
13
+ data_prep_connector-0.2.4.dev1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ data_prep_connector-0.2.4.dev1.dist-info/top_level.txt,sha256=V5veaYVXWTfjj98ntRCsHK7A36nzNprbMwB8PRrtsN4,14
15
+ data_prep_connector-0.2.4.dev1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
dpk_connector/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,15 +0,0 @@
1
- dpk_connector/__init__.py,sha256=xG6Sve8_Vf1RI0uLDIxEMrFM62TUxeTkuYVPPADqETQ,735
2
- dpk_connector/core/__init__.py,sha256=WrQMZyFE3Gn6fT7oHmL9zBYpJ9lI9j-PpJBqE_a6Zww,658
3
- dpk_connector/core/crawler.py,sha256=01-GpJg2x6A2F5GQHM5avCJrjr16ECF-D4ekqTtnkE8,13536
4
- dpk_connector/core/item.py,sha256=MZRTwhJJupkC_oegEfzrb-YsWP0TRv09Y2rwEv71uII,841
5
- dpk_connector/core/logging.py,sha256=aV1SNJUPgJuoiZ6wwlZcHTHigLB0vRDT2UfM0RWeWW4,981
6
- dpk_connector/core/middlewares.py,sha256=dB44kOG1wU1yCp7zNxe66DB37rTmYnsQokv99Bng-8k,9942
7
- dpk_connector/core/pipelines.py,sha256=W3EYF6l8hyV2FccJ2Mj2FL28RUtQoHKqSps-SYV1Lpo,1115
8
- dpk_connector/core/settings.py,sha256=62NtwqHo6BELfGgR9mXFEMuTlYdKrppGKgCQHzLl9ek,1993
9
- dpk_connector/core/utils.py,sha256=O6MI9Gz6TvncTv0isaxIvE29q-CnXLTN3cx4abEG2VE,3034
10
- dpk_connector/core/spiders/__init__.py,sha256=WrQMZyFE3Gn6fT7oHmL9zBYpJ9lI9j-PpJBqE_a6Zww,658
11
- dpk_connector/core/spiders/sitemap.py,sha256=SYT89P3V2QpHvE_PuEdBJlabKCswi_0W6A4sOqOnvXc,12600
12
- data_prep_connector-0.2.3.dev0.dist-info/METADATA,sha256=fXiPSnYGe-Au5uaAhvbnzX9RAHtkw94t2Ao-OHkjlvs,1839
13
- data_prep_connector-0.2.3.dev0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
14
- data_prep_connector-0.2.3.dev0.dist-info/top_level.txt,sha256=V5veaYVXWTfjj98ntRCsHK7A36nzNprbMwB8PRrtsN4,14
15
- data_prep_connector-0.2.3.dev0.dist-info/RECORD,,