data-prep-connector 0.2.3.dev0__tar.gz → 0.2.4.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/PKG-INFO +4 -4
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/doc/overview.md +1 -1
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/pyproject.toml +10 -7
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/PKG-INFO +4 -4
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/requires.txt +1 -1
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/__init__.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/__init__.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/crawler.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/item.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/logging.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/middlewares.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/pipelines.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/settings.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/spiders/__init__.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/spiders/sitemap.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/utils.py +1 -0
- data_prep_connector-0.2.4.dev1/test/dpk_connector/core/__init__.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_crawler.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_middlewares.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_sitemap_spider.py +1 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_utils.py +1 -0
- data_prep_connector-0.2.3.dev0/test/dpk_connector/core/__init__.py +0 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/Makefile +0 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/README.md +0 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/setup.cfg +0 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/SOURCES.txt +0 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/dependency_links.txt +0 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/top_level.txt +0 -0
- {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_sitemap_spider/index.html +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: data_prep_connector
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4.dev1
|
|
4
4
|
Summary: Scalable and Compliant Web Crawler
|
|
5
5
|
Author-email: Hiroya Matsubara <hmtbr@jp.ibm.com>
|
|
6
6
|
License: Apache-2.0
|
|
7
7
|
Keywords: data,data acquisition,crawler,web crawler,llm,generative,ai,fine-tuning,llmapps
|
|
8
|
-
Requires-Python: <3.
|
|
8
|
+
Requires-Python: <3.14,>=3.10
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
|
-
Requires-Dist: scrapy
|
|
10
|
+
Requires-Dist: scrapy==2.12.0
|
|
11
11
|
Requires-Dist: pydantic>=2.8.1
|
|
12
12
|
Requires-Dist: tldextract>=5.1.2
|
|
13
13
|
Provides-Extra: dev
|
|
@@ -21,7 +21,7 @@ pip install data-prep-connector
|
|
|
21
21
|
### From Github
|
|
22
22
|
|
|
23
23
|
```sh
|
|
24
|
-
pip install git+https://github.com/
|
|
24
|
+
pip install git+https://github.com/data-prep-kit/data-prep-kit.git@dev#subdirectory=data-connector-lib
|
|
25
25
|
```
|
|
26
26
|
|
|
27
27
|
## Example usage
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "data_prep_connector"
|
|
3
|
-
version = "0.2.
|
|
4
|
-
requires-python = ">=3.10,<3.
|
|
3
|
+
version = "0.2.4.dev1"
|
|
4
|
+
requires-python = ">=3.10,<3.14"
|
|
5
5
|
keywords = [
|
|
6
6
|
"data",
|
|
7
7
|
"data acquisition",
|
|
@@ -18,18 +18,21 @@ license = { text = "Apache-2.0" }
|
|
|
18
18
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
19
19
|
authors = [{ name = "Hiroya Matsubara", email = "hmtbr@jp.ibm.com" }]
|
|
20
20
|
dependencies = [
|
|
21
|
-
|
|
21
|
+
#lock to 2.12.0 as later versions has different requirements
|
|
22
|
+
#ERROR test/dpk_connector/core/test_sitemap_spider.py::test_parse -
|
|
23
|
+
# RuntimeError: The installed reactor (twisted.internet.epollreactor.EPollReactor) does not match the requested one (twisted.internet.asyncioreactor.AsyncioSelectorReactor)
|
|
24
|
+
"scrapy==2.12.0",
|
|
22
25
|
"pydantic>=2.8.1",
|
|
23
26
|
"tldextract>=5.1.2",
|
|
24
27
|
]
|
|
25
28
|
|
|
26
29
|
[project_urls]
|
|
27
|
-
Repository = "https://github.com/
|
|
28
|
-
Issues = "https://github.com/
|
|
29
|
-
Documentation = "https://
|
|
30
|
+
Repository = "https://github.com/data-prep-kit/data-prep-kit"
|
|
31
|
+
Issues = "https://github.com/data-prep-kit/data-prep-kit/issues"
|
|
32
|
+
Documentation = "https://data-prep-kit.github.io/data-prep-kit/"
|
|
30
33
|
|
|
31
34
|
[build-system]
|
|
32
|
-
requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
|
|
35
|
+
requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0,<=8.3.0"]
|
|
33
36
|
build-backend = "setuptools.build_meta"
|
|
34
37
|
|
|
35
38
|
[project.optional-dependencies]
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: data_prep_connector
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4.dev1
|
|
4
4
|
Summary: Scalable and Compliant Web Crawler
|
|
5
5
|
Author-email: Hiroya Matsubara <hmtbr@jp.ibm.com>
|
|
6
6
|
License: Apache-2.0
|
|
7
7
|
Keywords: data,data acquisition,crawler,web crawler,llm,generative,ai,fine-tuning,llmapps
|
|
8
|
-
Requires-Python: <3.
|
|
8
|
+
Requires-Python: <3.14,>=3.10
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
|
-
Requires-Dist: scrapy
|
|
10
|
+
Requires-Dist: scrapy==2.12.0
|
|
11
11
|
Requires-Dist: pydantic>=2.8.1
|
|
12
12
|
Requires-Dist: tldextract>=5.1.2
|
|
13
13
|
Provides-Extra: dev
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|