data-prep-connector 0.2.3.dev0__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/Makefile +3 -2
  2. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/PKG-INFO +4 -4
  3. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/doc/overview.md +2 -2
  4. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/pyproject.toml +10 -7
  5. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/data_prep_connector.egg-info/PKG-INFO +4 -4
  6. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/data_prep_connector.egg-info/requires.txt +1 -1
  7. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/__init__.py +1 -0
  8. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/__init__.py +1 -0
  9. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/crawler.py +1 -0
  10. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/item.py +1 -0
  11. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/logging.py +1 -0
  12. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/middlewares.py +1 -0
  13. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/pipelines.py +1 -0
  14. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/settings.py +1 -0
  15. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/spiders/__init__.py +1 -0
  16. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/spiders/sitemap.py +1 -0
  17. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/dpk_connector/core/utils.py +1 -0
  18. data_prep_connector-0.2.4/test/dpk_connector/core/__init__.py +1 -0
  19. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/test/dpk_connector/core/test_crawler.py +1 -0
  20. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/test/dpk_connector/core/test_middlewares.py +1 -0
  21. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/test/dpk_connector/core/test_sitemap_spider.py +1 -0
  22. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/test/dpk_connector/core/test_utils.py +1 -0
  23. data_prep_connector-0.2.3.dev0/test/dpk_connector/core/__init__.py +0 -0
  24. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/README.md +0 -0
  25. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/setup.cfg +0 -0
  26. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/data_prep_connector.egg-info/SOURCES.txt +0 -0
  27. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/data_prep_connector.egg-info/dependency_links.txt +0 -0
  28. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/src/data_prep_connector.egg-info/top_level.txt +0 -0
  29. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4}/test/dpk_connector/core/test_sitemap_spider/index.html +0 -0
@@ -32,8 +32,9 @@ venv:: pyproject.toml
32
32
  $(PYTHON) -m venv venv
33
33
  source venv/bin/activate; \
34
34
  pip install --upgrade pip; \
35
- pip install -e .; \
36
- pip install pytest pytest-mock pytest-datadir pytest-cov moto==5.0.5 markupsafe==2.0.1
35
+ pip install uv; \
36
+ uv pip install -e .; \
37
+ uv pip install pytest pytest-mock pytest-datadir pytest-cov moto==5.0.5 markupsafe==2.0.1
37
38
 
38
39
  image::
39
40
  @# Help: Placeholder does nothing for now.
@@ -1,13 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: data_prep_connector
3
- Version: 0.2.3.dev0
3
+ Version: 0.2.4
4
4
  Summary: Scalable and Compliant Web Crawler
5
5
  Author-email: Hiroya Matsubara <hmtbr@jp.ibm.com>
6
6
  License: Apache-2.0
7
7
  Keywords: data,data acquisition,crawler,web crawler,llm,generative,ai,fine-tuning,llmapps
8
- Requires-Python: <3.13,>=3.10
8
+ Requires-Python: <3.14,>=3.10
9
9
  Description-Content-Type: text/markdown
10
- Requires-Dist: scrapy>=2.11.2
10
+ Requires-Dist: scrapy==2.12.0
11
11
  Requires-Dist: pydantic>=2.8.1
12
12
  Requires-Dist: tldextract>=5.1.2
13
13
  Provides-Extra: dev
@@ -15,13 +15,13 @@ Features:
15
15
  ### From PyPI
16
16
 
17
17
  ```sh
18
- pip install data-prep-connector
18
+ uv pip install data-prep-connector
19
19
  ```
20
20
 
21
21
  ### From Github
22
22
 
23
23
  ```sh
24
- pip install git+https://github.com/IBM/data-prep-kit.git@dev#subdirectory=data-connector-lib
24
+ uv pip install git+https://github.com/data-prep-kit/data-prep-kit.git@dev#subdirectory=data-connector-lib
25
25
  ```
26
26
 
27
27
  ## Example usage
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "data_prep_connector"
3
- version = "0.2.3.dev0"
4
- requires-python = ">=3.10,<3.13"
3
+ version = "0.2.4"
4
+ requires-python = ">=3.10,<3.14"
5
5
  keywords = [
6
6
  "data",
7
7
  "data acquisition",
@@ -18,18 +18,21 @@ license = { text = "Apache-2.0" }
18
18
  readme = { file = "README.md", content-type = "text/markdown" }
19
19
  authors = [{ name = "Hiroya Matsubara", email = "hmtbr@jp.ibm.com" }]
20
20
  dependencies = [
21
- "scrapy>=2.11.2",
21
+ #lock to 2.12.0 as later versions has different requirements
22
+ #ERROR test/dpk_connector/core/test_sitemap_spider.py::test_parse -
23
+ # RuntimeError: The installed reactor (twisted.internet.epollreactor.EPollReactor) does not match the requested one (twisted.internet.asyncioreactor.AsyncioSelectorReactor)
24
+ "scrapy==2.12.0",
22
25
  "pydantic>=2.8.1",
23
26
  "tldextract>=5.1.2",
24
27
  ]
25
28
 
26
29
  [project_urls]
27
- Repository = "https://github.com/IBM/data-prep-kit"
28
- Issues = "https://github.com/IBM/data-prep-kit/issues"
29
- Documentation = "https://ibm.github.io/data-prep-kit/"
30
+ Repository = "https://github.com/data-prep-kit/data-prep-kit"
31
+ Issues = "https://github.com/data-prep-kit/data-prep-kit/issues"
32
+ Documentation = "https://data-prep-kit.github.io/data-prep-kit/"
30
33
 
31
34
  [build-system]
32
- requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
35
+ requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0,<=8.3.0"]
33
36
  build-backend = "setuptools.build_meta"
34
37
 
35
38
  [project.optional-dependencies]
@@ -1,13 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: data_prep_connector
3
- Version: 0.2.3.dev0
3
+ Version: 0.2.4
4
4
  Summary: Scalable and Compliant Web Crawler
5
5
  Author-email: Hiroya Matsubara <hmtbr@jp.ibm.com>
6
6
  License: Apache-2.0
7
7
  Keywords: data,data acquisition,crawler,web crawler,llm,generative,ai,fine-tuning,llmapps
8
- Requires-Python: <3.13,>=3.10
8
+ Requires-Python: <3.14,>=3.10
9
9
  Description-Content-Type: text/markdown
10
- Requires-Dist: scrapy>=2.11.2
10
+ Requires-Dist: scrapy==2.12.0
11
11
  Requires-Dist: pydantic>=2.8.1
12
12
  Requires-Dist: tldextract>=5.1.2
13
13
  Provides-Extra: dev
@@ -1,4 +1,4 @@
1
- scrapy>=2.11.2
1
+ scrapy==2.12.0
2
2
  pydantic>=2.8.1
3
3
  tldextract>=5.1.2
4
4
 
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -0,0 +1 @@
1
+ # SPDX-License-Identifier: Apache-2.0
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.