data-prep-connector 0.2.3.dev0__tar.gz → 0.2.4.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/PKG-INFO +4 -4
  2. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/doc/overview.md +1 -1
  3. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/pyproject.toml +10 -7
  4. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/PKG-INFO +4 -4
  5. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/requires.txt +1 -1
  6. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/__init__.py +1 -0
  7. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/__init__.py +1 -0
  8. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/crawler.py +1 -0
  9. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/item.py +1 -0
  10. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/logging.py +1 -0
  11. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/middlewares.py +1 -0
  12. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/pipelines.py +1 -0
  13. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/settings.py +1 -0
  14. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/spiders/__init__.py +1 -0
  15. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/spiders/sitemap.py +1 -0
  16. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/dpk_connector/core/utils.py +1 -0
  17. data_prep_connector-0.2.4.dev1/test/dpk_connector/core/__init__.py +1 -0
  18. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_crawler.py +1 -0
  19. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_middlewares.py +1 -0
  20. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_sitemap_spider.py +1 -0
  21. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_utils.py +1 -0
  22. data_prep_connector-0.2.3.dev0/test/dpk_connector/core/__init__.py +0 -0
  23. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/Makefile +0 -0
  24. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/README.md +0 -0
  25. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/setup.cfg +0 -0
  26. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/SOURCES.txt +0 -0
  27. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/dependency_links.txt +0 -0
  28. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/src/data_prep_connector.egg-info/top_level.txt +0 -0
  29. {data_prep_connector-0.2.3.dev0 → data_prep_connector-0.2.4.dev1}/test/dpk_connector/core/test_sitemap_spider/index.html +0 -0
@@ -1,13 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: data_prep_connector
3
- Version: 0.2.3.dev0
3
+ Version: 0.2.4.dev1
4
4
  Summary: Scalable and Compliant Web Crawler
5
5
  Author-email: Hiroya Matsubara <hmtbr@jp.ibm.com>
6
6
  License: Apache-2.0
7
7
  Keywords: data,data acquisition,crawler,web crawler,llm,generative,ai,fine-tuning,llmapps
8
- Requires-Python: <3.13,>=3.10
8
+ Requires-Python: <3.14,>=3.10
9
9
  Description-Content-Type: text/markdown
10
- Requires-Dist: scrapy>=2.11.2
10
+ Requires-Dist: scrapy==2.12.0
11
11
  Requires-Dist: pydantic>=2.8.1
12
12
  Requires-Dist: tldextract>=5.1.2
13
13
  Provides-Extra: dev
@@ -21,7 +21,7 @@ pip install data-prep-connector
21
21
  ### From Github
22
22
 
23
23
  ```sh
24
- pip install git+https://github.com/IBM/data-prep-kit.git@dev#subdirectory=data-connector-lib
24
+ pip install git+https://github.com/data-prep-kit/data-prep-kit.git@dev#subdirectory=data-connector-lib
25
25
  ```
26
26
 
27
27
  ## Example usage
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "data_prep_connector"
3
- version = "0.2.3.dev0"
4
- requires-python = ">=3.10,<3.13"
3
+ version = "0.2.4.dev1"
4
+ requires-python = ">=3.10,<3.14"
5
5
  keywords = [
6
6
  "data",
7
7
  "data acquisition",
@@ -18,18 +18,21 @@ license = { text = "Apache-2.0" }
18
18
  readme = { file = "README.md", content-type = "text/markdown" }
19
19
  authors = [{ name = "Hiroya Matsubara", email = "hmtbr@jp.ibm.com" }]
20
20
  dependencies = [
21
- "scrapy>=2.11.2",
21
+ #lock to 2.12.0 as later versions has different requirements
22
+ #ERROR test/dpk_connector/core/test_sitemap_spider.py::test_parse -
23
+ # RuntimeError: The installed reactor (twisted.internet.epollreactor.EPollReactor) does not match the requested one (twisted.internet.asyncioreactor.AsyncioSelectorReactor)
24
+ "scrapy==2.12.0",
22
25
  "pydantic>=2.8.1",
23
26
  "tldextract>=5.1.2",
24
27
  ]
25
28
 
26
29
  [project_urls]
27
- Repository = "https://github.com/IBM/data-prep-kit"
28
- Issues = "https://github.com/IBM/data-prep-kit/issues"
29
- Documentation = "https://ibm.github.io/data-prep-kit/"
30
+ Repository = "https://github.com/data-prep-kit/data-prep-kit"
31
+ Issues = "https://github.com/data-prep-kit/data-prep-kit/issues"
32
+ Documentation = "https://data-prep-kit.github.io/data-prep-kit/"
30
33
 
31
34
  [build-system]
32
- requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0"]
35
+ requires = ["setuptools>=68.0.0", "wheel", "setuptools_scm[toml]>=7.1.0,<=8.3.0"]
33
36
  build-backend = "setuptools.build_meta"
34
37
 
35
38
  [project.optional-dependencies]
@@ -1,13 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: data_prep_connector
3
- Version: 0.2.3.dev0
3
+ Version: 0.2.4.dev1
4
4
  Summary: Scalable and Compliant Web Crawler
5
5
  Author-email: Hiroya Matsubara <hmtbr@jp.ibm.com>
6
6
  License: Apache-2.0
7
7
  Keywords: data,data acquisition,crawler,web crawler,llm,generative,ai,fine-tuning,llmapps
8
- Requires-Python: <3.13,>=3.10
8
+ Requires-Python: <3.14,>=3.10
9
9
  Description-Content-Type: text/markdown
10
- Requires-Dist: scrapy>=2.11.2
10
+ Requires-Dist: scrapy==2.12.0
11
11
  Requires-Dist: pydantic>=2.8.1
12
12
  Requires-Dist: tldextract>=5.1.2
13
13
  Provides-Extra: dev
@@ -1,4 +1,4 @@
1
- scrapy>=2.11.2
1
+ scrapy==2.12.0
2
2
  pydantic>=2.8.1
3
3
  tldextract>=5.1.2
4
4
 
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -0,0 +1 @@
1
+ # SPDX-License-Identifier: Apache-2.0
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.
@@ -1,3 +1,4 @@
1
+ # SPDX-License-Identifier: Apache-2.0
1
2
  # (C) Copyright IBM Corp. 2024.
2
3
  # Licensed under the Apache License, Version 2.0 (the “License”);
3
4
  # you may not use this file except in compliance with the License.