phoenix-engine 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. phoenix_engine-0.1.0/PKG-INFO +187 -0
  2. phoenix_engine-0.1.0/README.md +134 -0
  3. phoenix_engine-0.1.0/pyproject.toml +184 -0
  4. phoenix_engine-0.1.0/setup.cfg +4 -0
  5. phoenix_engine-0.1.0/src/phoenix/__init__.py +41 -0
  6. phoenix_engine-0.1.0/src/phoenix/__main__.py +8 -0
  7. phoenix_engine-0.1.0/src/phoenix/adapters/__init__.py +25 -0
  8. phoenix_engine-0.1.0/src/phoenix/adapters/base.py +230 -0
  9. phoenix_engine-0.1.0/src/phoenix/adapters/facebook.py +482 -0
  10. phoenix_engine-0.1.0/src/phoenix/adapters/generated/__init__.py +0 -0
  11. phoenix_engine-0.1.0/src/phoenix/adapters/generated/quotes_to_scrape.py +76 -0
  12. phoenix_engine-0.1.0/src/phoenix/adapters/generic.py +189 -0
  13. phoenix_engine-0.1.0/src/phoenix/adapters/instagram.py +426 -0
  14. phoenix_engine-0.1.0/src/phoenix/adapters/linkedin.py +542 -0
  15. phoenix_engine-0.1.0/src/phoenix/adapters/tiktok.py +557 -0
  16. phoenix_engine-0.1.0/src/phoenix/adapters/x_twitter.py +401 -0
  17. phoenix_engine-0.1.0/src/phoenix/adapters/youtube.py +544 -0
  18. phoenix_engine-0.1.0/src/phoenix/architect/__init__.py +15 -0
  19. phoenix_engine-0.1.0/src/phoenix/architect/coder.py +150 -0
  20. phoenix_engine-0.1.0/src/phoenix/architect/critic.py +324 -0
  21. phoenix_engine-0.1.0/src/phoenix/architect/explorer.py +232 -0
  22. phoenix_engine-0.1.0/src/phoenix/architect/fixture_generator.py +256 -0
  23. phoenix_engine-0.1.0/src/phoenix/architect/inspector.py +111 -0
  24. phoenix_engine-0.1.0/src/phoenix/architect/orchestrator.py +403 -0
  25. phoenix_engine-0.1.0/src/phoenix/architect/researcher.py +187 -0
  26. phoenix_engine-0.1.0/src/phoenix/architect/template_generator.py +145 -0
  27. phoenix_engine-0.1.0/src/phoenix/architect/writer.py +108 -0
  28. phoenix_engine-0.1.0/src/phoenix/cli/__init__.py +7 -0
  29. phoenix_engine-0.1.0/src/phoenix/cli/main.py +725 -0
  30. phoenix_engine-0.1.0/src/phoenix/collectors/__init__.py +17 -0
  31. phoenix_engine-0.1.0/src/phoenix/collectors/base.py +81 -0
  32. phoenix_engine-0.1.0/src/phoenix/collectors/browser.py +209 -0
  33. phoenix_engine-0.1.0/src/phoenix/collectors/browser_pool.py +197 -0
  34. phoenix_engine-0.1.0/src/phoenix/collectors/direct.py +132 -0
  35. phoenix_engine-0.1.0/src/phoenix/engine.py +257 -0
  36. phoenix_engine-0.1.0/src/phoenix/exceptions.py +77 -0
  37. phoenix_engine-0.1.0/src/phoenix/infrastructure/__init__.py +40 -0
  38. phoenix_engine-0.1.0/src/phoenix/infrastructure/audit_logger.py +68 -0
  39. phoenix_engine-0.1.0/src/phoenix/infrastructure/config.py +134 -0
  40. phoenix_engine-0.1.0/src/phoenix/infrastructure/license_manager.py +270 -0
  41. phoenix_engine-0.1.0/src/phoenix/infrastructure/rate_limiter.py +197 -0
  42. phoenix_engine-0.1.0/src/phoenix/infrastructure/session_manager.py +92 -0
  43. phoenix_engine-0.1.0/src/phoenix/infrastructure/storage.py +580 -0
  44. phoenix_engine-0.1.0/src/phoenix/infrastructure/vault.py +275 -0
  45. phoenix_engine-0.1.0/src/phoenix/intelligence/__init__.py +18 -0
  46. phoenix_engine-0.1.0/src/phoenix/intelligence/anti_bot_recovery.py +205 -0
  47. phoenix_engine-0.1.0/src/phoenix/intelligence/change_detector.py +314 -0
  48. phoenix_engine-0.1.0/src/phoenix/intelligence/classifier.py +179 -0
  49. phoenix_engine-0.1.0/src/phoenix/intelligence/entities.py +104 -0
  50. phoenix_engine-0.1.0/src/phoenix/intelligence/selector_health.py +139 -0
  51. phoenix_engine-0.1.0/src/phoenix/intelligence/selector_repair.py +35 -0
  52. phoenix_engine-0.1.0/src/phoenix/models/__init__.py +34 -0
  53. phoenix_engine-0.1.0/src/phoenix/models/classification.py +19 -0
  54. phoenix_engine-0.1.0/src/phoenix/models/config.py +207 -0
  55. phoenix_engine-0.1.0/src/phoenix/models/document.py +70 -0
  56. phoenix_engine-0.1.0/src/phoenix/models/output.py +182 -0
  57. phoenix_engine-0.1.0/src/phoenix/models/session.py +26 -0
  58. phoenix_engine-0.1.0/src/phoenix/models/strategy.py +28 -0
  59. phoenix_engine-0.1.0/src/phoenix/options.py +67 -0
  60. phoenix_engine-0.1.0/src/phoenix/pipeline.py +598 -0
  61. phoenix_engine-0.1.0/src/phoenix/plugins/__init__.py +9 -0
  62. phoenix_engine-0.1.0/src/phoenix/plugins/loader.py +266 -0
  63. phoenix_engine-0.1.0/src/phoenix/plugins/manifest.py +62 -0
  64. phoenix_engine-0.1.0/src/phoenix/plugins/registry.py +109 -0
  65. phoenix_engine-0.1.0/src/phoenix/processing/__init__.py +15 -0
  66. phoenix_engine-0.1.0/src/phoenix/processing/ai_assistant.py +101 -0
  67. phoenix_engine-0.1.0/src/phoenix/processing/archiver.py +124 -0
  68. phoenix_engine-0.1.0/src/phoenix/processing/domain_memory.py +304 -0
  69. phoenix_engine-0.1.0/src/phoenix/processing/html_extractor.py +79 -0
  70. phoenix_engine-0.1.0/src/phoenix/processing/normalizer.py +124 -0
  71. phoenix_engine-0.1.0/src/phoenix/processing/phoenix_ai_extractor.py +436 -0
  72. phoenix_engine-0.1.0/src/phoenix/py.typed +0 -0
  73. phoenix_engine-0.1.0/src/phoenix/router.py +304 -0
  74. phoenix_engine-0.1.0/src/phoenix/scrapers/__init__.py +33 -0
  75. phoenix_engine-0.1.0/src/phoenix/scrapers/base.py +13 -0
  76. phoenix_engine-0.1.0/src/phoenix/scrapers/browser.py +9 -0
  77. phoenix_engine-0.1.0/src/phoenix/scrapers/http.py +9 -0
  78. phoenix_engine-0.1.0/src/phoenix/scrapers/selector_engine.py +38 -0
  79. phoenix_engine-0.1.0/src/phoenix/stealth/__init__.py +21 -0
  80. phoenix_engine-0.1.0/src/phoenix/stealth/captcha.py +143 -0
  81. phoenix_engine-0.1.0/src/phoenix/stealth/humanizer.py +101 -0
  82. phoenix_engine-0.1.0/src/phoenix/stealth/profile.py +134 -0
  83. phoenix_engine-0.1.0/src/phoenix/stealth/rotator.py +87 -0
  84. phoenix_engine-0.1.0/src/phoenix/stealth/warming.py +56 -0
  85. phoenix_engine-0.1.0/src/phoenix/strategy_selector.py +145 -0
  86. phoenix_engine-0.1.0/src/phoenix/version.py +7 -0
  87. phoenix_engine-0.1.0/src/phoenix_engine.egg-info/PKG-INFO +187 -0
  88. phoenix_engine-0.1.0/src/phoenix_engine.egg-info/SOURCES.txt +90 -0
  89. phoenix_engine-0.1.0/src/phoenix_engine.egg-info/dependency_links.txt +1 -0
  90. phoenix_engine-0.1.0/src/phoenix_engine.egg-info/entry_points.txt +2 -0
  91. phoenix_engine-0.1.0/src/phoenix_engine.egg-info/requires.txt +32 -0
  92. phoenix_engine-0.1.0/src/phoenix_engine.egg-info/top_level.txt +1 -0
@@ -0,0 +1,187 @@
1
+ Metadata-Version: 2.4
2
+ Name: phoenix-engine
3
+ Version: 0.1.0
4
+ Summary: Universal pure web scraping engine.
5
+ Author: Phoenix Engine Team
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/phnx-tech/phoenix-engine
8
+ Project-URL: Documentation, https://github.com/phnx-tech/phoenix-engine#readme
9
+ Project-URL: Repository, https://github.com/phnx-tech/phoenix-engine.git
10
+ Project-URL: Issues, https://github.com/phnx-tech/phoenix-engine/issues
11
+ Keywords: scraping,web,social-media,html,automation
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Internet :: WWW/HTTP
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: httpx>=0.27.0
23
+ Requires-Dist: playwright>=1.40.0
24
+ Requires-Dist: beautifulsoup4>=4.12.0
25
+ Requires-Dist: lxml>=5.0.0
26
+ Requires-Dist: cssselect>=1.2.0
27
+ Requires-Dist: pydantic>=2.0.0
28
+ Requires-Dist: pydantic-settings>=2.0.0
29
+ Requires-Dist: typer>=0.12.0
30
+ Requires-Dist: sqlalchemy>=2.0.0
31
+ Requires-Dist: alembic>=1.13.0
32
+ Requires-Dist: keyring>=24.0.0
33
+ Requires-Dist: cryptography>=42.0.0
34
+ Requires-Dist: structlog>=24.0.0
35
+ Requires-Dist: openai>=1.0.0
36
+ Requires-Dist: duckduckgo-search>=8.0.0
37
+ Requires-Dist: pyyaml>=6.0
38
+ Provides-Extra: dev
39
+ Requires-Dist: build>=1.0.0; extra == "dev"
40
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
41
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
42
+ Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
43
+ Requires-Dist: respx>=0.21.0; extra == "dev"
44
+ Requires-Dist: pytest-playwright>=0.5.0; extra == "dev"
45
+ Requires-Dist: black>=24.0.0; extra == "dev"
46
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
47
+ Requires-Dist: mypy>=1.10.0; extra == "dev"
48
+ Requires-Dist: pre-commit>=3.7.0; extra == "dev"
49
+ Requires-Dist: faker>=25.0.0; extra == "dev"
50
+ Requires-Dist: factory-boy>=3.3.0; extra == "dev"
51
+ Requires-Dist: mkdocs>=1.6.0; extra == "dev"
52
+ Requires-Dist: mkdocs-material>=9.5.0; extra == "dev"
53
+
54
+ # Phoenix Engine
55
+
56
+ A universal pure-web scraping engine that turns public web pages into structured,
57
+ predictable data. No official APIs required — Phoenix Engine uses raw HTTP
58
+ requests and headless browser automation to extract posts, profiles, listings,
59
+ and articles from social platforms and websites.
60
+
61
+ > **Current status:** beta / early access. A license key is required to run the
62
+ > engine in production.
63
+
64
+ ## What it does
65
+
66
+ - Scrapes public pages using **HTTP** or **headless browser** strategies.
67
+ - Returns a **unified JSON output** no matter what platform you target.
68
+ - Automatically adapts to site changes, anti-bot measures, and selector drift.
69
+ - Learns from past scrapes to pick the best strategy for each domain.
70
+ - Can be used from the command line or inside your Python application.
71
+
72
+ ## Install
73
+
74
+ ### From PyPI
75
+
76
+ ```bash
77
+ pip install phoenix-engine
78
+ ```
79
+
80
+ ### From a GitHub Release wheel
81
+
82
+ Download the `.whl` from the latest release, then:
83
+
84
+ ```bash
85
+ pip install phoenix_engine-0.1.0-py3-none-any.whl
86
+ ```
87
+
88
+ ## Activate your license
89
+
90
+ Phoenix Engine is distributed under license keys during beta. After installing,
91
+ set your key:
92
+
93
+ ```bash
94
+ export PHOENIX_LICENSE_ENFORCEMENT_ENABLED=true
95
+ export PHOENIX_LICENSE_SECRET="your-signing-secret"
96
+ export PHOENIX_LICENSE_KEY="phx.eyJ..."
97
+ ```
98
+
99
+ Or create a `phoenix.yaml` file:
100
+
101
+ ```yaml
102
+ license_enforcement_enabled: true
103
+ license_secret: "your-signing-secret"
104
+ license_key: "phx.eyJ..."
105
+ ```
106
+
107
+ If the key is missing, expired, tampered with, or over its use limit, the
108
+ engine will refuse to start.
109
+
110
+ ## Quick start — CLI
111
+
112
+ ```bash
113
+ # Scrape a single public page
114
+ phoenix scrape "https://example.com/post/123"
115
+
116
+ # Scrape without archiving the raw source
117
+ phoenix scrape "https://example.com/post/123" --no-archive
118
+
119
+ # Scrape multiple URLs in parallel
120
+ phoenix scrape-batch \
121
+ "https://example.com/post/123" \
122
+ "https://example.com/post/456" \
123
+ --output results.json
124
+
125
+ # List built-in platform adapters
126
+ phoenix plugins list
127
+
128
+ # Inspect effective configuration (secrets are masked)
129
+ phoenix config show
130
+ ```
131
+
132
+ ## Quick start — Python library
133
+
134
+ ```python
135
+ import asyncio
136
+ from phoenix import PhoenixEngine
137
+
138
+ async def main() -> None:
139
+ async with PhoenixEngine() as engine:
140
+ result = await engine.scrape("https://example.com/post/123")
141
+ print(result.output.model_dump_json(indent=2))
142
+
143
+ asyncio.run(main())
144
+ ```
145
+
146
+ ## Configuration
147
+
148
+ Most settings can be controlled with environment variables or a config file
149
+ (`phoenix.yaml`, `phoenix.json`, `phoenix.toml`):
150
+
151
+ ```yaml
152
+ timeout: 30
153
+ stealth_enabled: true
154
+ ai_enabled: false
155
+ rate_limits:
156
+ example.com: 1.0
157
+ ```
158
+
159
+ Run `phoenix config show` to see the active configuration.
160
+
161
+ ## Supported platforms
162
+
163
+ Phoenix Engine ships with adapters for common public platforms and a generic
164
+ fallback for any HTML page:
165
+
166
+ - Instagram, Facebook, X/Twitter, LinkedIn, TikTok, YouTube
167
+ - Generic blogs, listings, and article pages
168
+
169
+ Adapters are plugin-based, so new platforms can be added without touching the
170
+ core engine.
171
+
172
+ ## Ethical use
173
+
174
+ Phoenix Engine only scrapes **publicly available** content. Always respect:
175
+
176
+ - The target site's `robots.txt` and Terms of Service.
177
+ - Local laws and data-protection regulations (GDPR, CCPA, etc.).
178
+ - Rate limits — the engine includes built-in throttling to avoid overload.
179
+
180
+ ## Support
181
+
182
+ - Issues: https://github.com/phnx-tech/phoenix-engine/issues
183
+ - Repository: https://github.com/phnx-tech/phoenix-engine
184
+
185
+ ## License
186
+
187
+ Commercial beta license. See your license agreement for terms.
@@ -0,0 +1,134 @@
1
+ # Phoenix Engine
2
+
3
+ A universal pure-web scraping engine that turns public web pages into structured,
4
+ predictable data. No official APIs required — Phoenix Engine uses raw HTTP
5
+ requests and headless browser automation to extract posts, profiles, listings,
6
+ and articles from social platforms and websites.
7
+
8
+ > **Current status:** beta / early access. A license key is required to run the
9
+ > engine in production.
10
+
11
+ ## What it does
12
+
13
+ - Scrapes public pages using **HTTP** or **headless browser** strategies.
14
+ - Returns a **unified JSON output** no matter what platform you target.
15
+ - Automatically adapts to site changes, anti-bot measures, and selector drift.
16
+ - Learns from past scrapes to pick the best strategy for each domain.
17
+ - Can be used from the command line or inside your Python application.
18
+
19
+ ## Install
20
+
21
+ ### From PyPI
22
+
23
+ ```bash
24
+ pip install phoenix-engine
25
+ ```
26
+
27
+ ### From a GitHub Release wheel
28
+
29
+ Download the `.whl` from the latest release, then:
30
+
31
+ ```bash
32
+ pip install phoenix_engine-0.1.0-py3-none-any.whl
33
+ ```
34
+
35
+ ## Activate your license
36
+
37
+ Phoenix Engine is distributed under license keys during beta. After installing,
38
+ set your key:
39
+
40
+ ```bash
41
+ export PHOENIX_LICENSE_ENFORCEMENT_ENABLED=true
42
+ export PHOENIX_LICENSE_SECRET="your-signing-secret"
43
+ export PHOENIX_LICENSE_KEY="phx.eyJ..."
44
+ ```
45
+
46
+ Or create a `phoenix.yaml` file:
47
+
48
+ ```yaml
49
+ license_enforcement_enabled: true
50
+ license_secret: "your-signing-secret"
51
+ license_key: "phx.eyJ..."
52
+ ```
53
+
54
+ If the key is missing, expired, tampered with, or over its use limit, the
55
+ engine will refuse to start.
56
+
57
+ ## Quick start — CLI
58
+
59
+ ```bash
60
+ # Scrape a single public page
61
+ phoenix scrape "https://example.com/post/123"
62
+
63
+ # Scrape without archiving the raw source
64
+ phoenix scrape "https://example.com/post/123" --no-archive
65
+
66
+ # Scrape multiple URLs in parallel
67
+ phoenix scrape-batch \
68
+ "https://example.com/post/123" \
69
+ "https://example.com/post/456" \
70
+ --output results.json
71
+
72
+ # List built-in platform adapters
73
+ phoenix plugins list
74
+
75
+ # Inspect effective configuration (secrets are masked)
76
+ phoenix config show
77
+ ```
78
+
79
+ ## Quick start — Python library
80
+
81
+ ```python
82
+ import asyncio
83
+ from phoenix import PhoenixEngine
84
+
85
+ async def main() -> None:
86
+ async with PhoenixEngine() as engine:
87
+ result = await engine.scrape("https://example.com/post/123")
88
+ print(result.output.model_dump_json(indent=2))
89
+
90
+ asyncio.run(main())
91
+ ```
92
+
93
+ ## Configuration
94
+
95
+ Most settings can be controlled with environment variables or a config file
96
+ (`phoenix.yaml`, `phoenix.json`, `phoenix.toml`):
97
+
98
+ ```yaml
99
+ timeout: 30
100
+ stealth_enabled: true
101
+ ai_enabled: false
102
+ rate_limits:
103
+ example.com: 1.0
104
+ ```
105
+
106
+ Run `phoenix config show` to see the active configuration.
107
+
108
+ ## Supported platforms
109
+
110
+ Phoenix Engine ships with adapters for common public platforms and a generic
111
+ fallback for any HTML page:
112
+
113
+ - Instagram, Facebook, X/Twitter, LinkedIn, TikTok, YouTube
114
+ - Generic blogs, listings, and article pages
115
+
116
+ Adapters are plugin-based, so new platforms can be added without touching the
117
+ core engine.
118
+
119
+ ## Ethical use
120
+
121
+ Phoenix Engine only scrapes **publicly available** content. Always respect:
122
+
123
+ - The target site's `robots.txt` and Terms of Service.
124
+ - Local laws and data-protection regulations (GDPR, CCPA, etc.).
125
+ - Rate limits — the engine includes built-in throttling to avoid overload.
126
+
127
+ ## Support
128
+
129
+ - Issues: https://github.com/phnx-tech/phoenix-engine/issues
130
+ - Repository: https://github.com/phnx-tech/phoenix-engine
131
+
132
+ ## License
133
+
134
+ Commercial beta license. See your license agreement for terms.
@@ -0,0 +1,184 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "phoenix-engine"
7
+ version = "0.1.0"
8
+ description = "Universal pure web scraping engine."
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = "MIT"
12
+ authors = [
13
+ { name = "Phoenix Engine Team" },
14
+ ]
15
+ keywords = ["scraping", "web", "social-media", "html", "automation"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Internet :: WWW/HTTP",
24
+ "Topic :: Software Development :: Libraries :: Python Modules",
25
+ ]
26
+ dependencies = [
27
+ "httpx>=0.27.0",
28
+ "playwright>=1.40.0",
29
+ "beautifulsoup4>=4.12.0",
30
+ "lxml>=5.0.0",
31
+ "cssselect>=1.2.0",
32
+ "pydantic>=2.0.0",
33
+ "pydantic-settings>=2.0.0",
34
+ "typer>=0.12.0",
35
+ "sqlalchemy>=2.0.0",
36
+ "alembic>=1.13.0",
37
+ "keyring>=24.0.0",
38
+ "cryptography>=42.0.0",
39
+ "structlog>=24.0.0",
40
+ "openai>=1.0.0",
41
+ "duckduckgo-search>=8.0.0",
42
+ "pyyaml>=6.0",
43
+ ]
44
+
45
+ [project.optional-dependencies]
46
+ dev = [
47
+ "build>=1.0.0",
48
+ "pytest>=8.0.0",
49
+ "pytest-asyncio>=0.23.0",
50
+ "pytest-cov>=5.0.0",
51
+ "respx>=0.21.0",
52
+ "pytest-playwright>=0.5.0",
53
+ "black>=24.0.0",
54
+ "ruff>=0.4.0",
55
+ "mypy>=1.10.0",
56
+ "pre-commit>=3.7.0",
57
+ "faker>=25.0.0",
58
+ "factory-boy>=3.3.0",
59
+ "mkdocs>=1.6.0",
60
+ "mkdocs-material>=9.5.0",
61
+ ]
62
+
63
+ [project.scripts]
64
+ phoenix = "phoenix.cli.main:app"
65
+
66
+ [project.urls]
67
+ Homepage = "https://github.com/phnx-tech/phoenix-engine"
68
+ Documentation = "https://github.com/phnx-tech/phoenix-engine#readme"
69
+ Repository = "https://github.com/phnx-tech/phoenix-engine.git"
70
+ Issues = "https://github.com/phnx-tech/phoenix-engine/issues"
71
+
72
+ [tool.setuptools.packages.find]
73
+ where = ["src"]
74
+
75
+ [tool.black]
76
+ line-length = 100
77
+ target-version = ["py311", "py312", "py313"]
78
+ include = "\\.pyi?$"
79
+ extend-exclude = """
80
+ /(
81
+ migrations
82
+ | archive
83
+ | \\.venv
84
+ )/
85
+ """
86
+
87
+ [tool.ruff]
88
+ target-version = "py311"
89
+ line-length = 100
90
+
91
+ [tool.ruff.lint]
92
+ select = ["ALL"]
93
+ ignore = [
94
+ "D105",
95
+ "D107",
96
+ "CPY001",
97
+ "TD003",
98
+ "FIX002",
99
+ "FBT003",
100
+ "ERA001",
101
+ "EM101",
102
+ "EM102",
103
+ "TRY003",
104
+ "D407",
105
+ "D413",
106
+ ]
107
+
108
+ [tool.ruff.lint.pydocstyle]
109
+ convention = "google"
110
+
111
+ [tool.ruff.lint.mccabe]
112
+ max-complexity = 12
113
+
114
+ [tool.ruff.lint.per-file-ignores]
115
+ "tests/*" = [
116
+ "S101",
117
+ "D100",
118
+ "D103",
119
+ "SLF001",
120
+ "ARG001",
121
+ "PLR2004",
122
+ "PLC0415",
123
+ ]
124
+ "tests/fixtures/*" = ["ALL"]
125
+ "*/__init__.py" = ["D104"]
126
+ "src/phoenix/adapters/generated/*.py" = ["ALL"]
127
+ "src/phoenix/cli/main.py" = ["ARG001", "B008", "FBT001", "PLR0913", "TC003"]
128
+ "src/phoenix/pipeline.py" = ["ANN401", "BLE001", "S112"]
129
+ "src/phoenix/adapters/base.py" = ["F401", "RUF001", "TC001", "UP035"]
130
+ "src/phoenix/adapters/generic.py" = ["ARG002"]
131
+ "src/phoenix/router.py" = ["PLC0415"]
132
+ "src/phoenix/plugins/loader.py" = ["ANN401", "PLC0415", "TC003", "TRY004", "TRY300"]
133
+ "src/phoenix/plugins/registry.py" = ["PYI034", "SLF001", "TC003"]
134
+ "tests/unit/test_plugin_loader.py" = ["ANN401", "ARG002", "D102", "PLW0108", "Q001", "TC001", "W605"]
135
+ "tests/unit/test_plugin_interface.py" = ["ANN401", "ARG002", "D102"]
136
+
137
+ [tool.mypy]
138
+ python_version = "3.11"
139
+ strict = true
140
+ warn_return_any = true
141
+ warn_unused_ignores = true
142
+ warn_unreachable = true
143
+ disallow_untyped_defs = true
144
+ disallow_incomplete_defs = true
145
+ check_untyped_defs = true
146
+ no_implicit_optional = true
147
+ warn_redundant_casts = true
148
+ warn_unused_configs = true
149
+ show_error_codes = true
150
+
151
+ [tool.pytest.ini_options]
152
+ asyncio_mode = "auto"
153
+ testpaths = ["tests"]
154
+ markers = [
155
+ "integration: marks tests as integration tests",
156
+ ]
157
+ addopts = "-ra"
158
+
159
+ [tool.coverage.run]
160
+ source = ["phoenix"]
161
+ branch = true
162
+
163
+ [tool.coverage.report]
164
+ fail_under = 85
165
+ skip_covered = false
166
+ show_missing = true
167
+ include = [
168
+ "src/phoenix/adapters/*.py",
169
+ "src/phoenix/collectors/*",
170
+ "src/phoenix/infrastructure/*.py",
171
+ "src/phoenix/models/*",
172
+ "src/phoenix/options.py",
173
+ "src/phoenix/exceptions.py",
174
+ "src/phoenix/plugins/*.py",
175
+ "src/phoenix/router.py",
176
+ "src/phoenix/version.py",
177
+ "src/phoenix/processing/ai_assistant.py",
178
+ "src/phoenix/processing/archiver.py",
179
+ "src/phoenix/processing/phoenix_ai_extractor.py",
180
+ "src/phoenix/intelligence/*.py",
181
+ "src/phoenix/pipeline.py",
182
+ "src/phoenix/engine.py",
183
+ "src/phoenix/scrapers/*.py",
184
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,41 @@
1
+ """Phoenix Engine -- universal pure web scraping engine."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from phoenix.adapters import (
6
+ BaseAdapter,
7
+ GenericWebAdapter,
8
+ PluginInterface,
9
+ ScraperPlugin,
10
+ )
11
+ from phoenix.engine import PhoenixEngine
12
+ from phoenix.models.output import (
13
+ CollectionResult,
14
+ ScrapingResult,
15
+ UnifiedOutput,
16
+ )
17
+ from phoenix.models.strategy import ScrapingStrategy
18
+
19
+ # Import options and core models before engine to prevent circular imports
20
+ # with router/pipeline submodules.
21
+ from phoenix.options import CollectionOptions, ScrapingOptions
22
+ from phoenix.plugins import PluginLoader, PluginManifest, PluginRegistry
23
+ from phoenix.version import __version__
24
+
25
+ __all__ = [
26
+ "BaseAdapter",
27
+ "CollectionOptions",
28
+ "CollectionResult",
29
+ "GenericWebAdapter",
30
+ "PhoenixEngine",
31
+ "PluginInterface",
32
+ "PluginLoader",
33
+ "PluginManifest",
34
+ "PluginRegistry",
35
+ "ScraperPlugin",
36
+ "ScrapingOptions",
37
+ "ScrapingResult",
38
+ "ScrapingStrategy",
39
+ "UnifiedOutput",
40
+ "__version__",
41
+ ]
@@ -0,0 +1,8 @@
1
+ """Entry point for ``python -m phoenix``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from phoenix.cli.main import app
6
+
7
+ if __name__ == "__main__":
8
+ app()
@@ -0,0 +1,25 @@
1
+ """Platform adapters that parse HTML into structured data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from phoenix.adapters.base import BaseAdapter, PluginInterface, ScraperPlugin
6
+ from phoenix.adapters.facebook import FacebookAdapter
7
+ from phoenix.adapters.generic import GenericWebAdapter
8
+ from phoenix.adapters.instagram import InstagramAdapter
9
+ from phoenix.adapters.linkedin import LinkedInAdapter
10
+ from phoenix.adapters.tiktok import TikTokAdapter
11
+ from phoenix.adapters.x_twitter import XTwitterAdapter
12
+ from phoenix.adapters.youtube import YouTubeAdapter
13
+
14
+ __all__ = [
15
+ "BaseAdapter",
16
+ "FacebookAdapter",
17
+ "GenericWebAdapter",
18
+ "InstagramAdapter",
19
+ "LinkedInAdapter",
20
+ "PluginInterface",
21
+ "ScraperPlugin",
22
+ "TikTokAdapter",
23
+ "XTwitterAdapter",
24
+ "YouTubeAdapter",
25
+ ]