search-parser 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- search_parser-0.0.1/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
- search_parser-0.0.1/.github/ISSUE_TEMPLATE/feature_request.md +19 -0
- search_parser-0.0.1/.github/dependabot.yml +16 -0
- search_parser-0.0.1/.github/pull_request_template.md +23 -0
- search_parser-0.0.1/.github/workflows/codeql.yml +101 -0
- search_parser-0.0.1/.github/workflows/coverage-badge.yml +40 -0
- search_parser-0.0.1/.github/workflows/lint.yml +31 -0
- search_parser-0.0.1/.github/workflows/publish.yml +26 -0
- search_parser-0.0.1/.github/workflows/test.yml +47 -0
- search_parser-0.0.1/.gitignore +49 -0
- search_parser-0.0.1/.pre-commit-config.yaml +18 -0
- search_parser-0.0.1/CHANGELOG.md +28 -0
- search_parser-0.0.1/CLAUDE.md +40 -0
- search_parser-0.0.1/CODE_OF_CONDUCT.md +40 -0
- search_parser-0.0.1/CONTRIBUTING.md +209 -0
- search_parser-0.0.1/LICENSE +190 -0
- search_parser-0.0.1/PKG-INFO +214 -0
- search_parser-0.0.1/README.md +172 -0
- search_parser-0.0.1/docs/adding_search_engine.md +92 -0
- search_parser-0.0.1/docs/api_reference.md +37 -0
- search_parser-0.0.1/docs/contributing.md +16 -0
- search_parser-0.0.1/docs/examples/advanced_usage.md +67 -0
- search_parser-0.0.1/docs/examples/basic_usage.md +46 -0
- search_parser-0.0.1/docs/getting_started.md +54 -0
- search_parser-0.0.1/docs/index.md +35 -0
- search_parser-0.0.1/examples/basic_parsing.py +55 -0
- search_parser-0.0.1/examples/batch_processing.py +40 -0
- search_parser-0.0.1/examples/custom_formatter.py +73 -0
- search_parser-0.0.1/mkdocs.yml +48 -0
- search_parser-0.0.1/pyproject.toml +151 -0
- search_parser-0.0.1/scripts/update_fixtures.py +40 -0
- search_parser-0.0.1/src/search_engine_parser/__init__.py +12 -0
- search_parser-0.0.1/src/search_engine_parser/__version__.py +3 -0
- search_parser-0.0.1/src/search_engine_parser/cli.py +79 -0
- search_parser-0.0.1/src/search_engine_parser/core/__init__.py +1 -0
- search_parser-0.0.1/src/search_engine_parser/core/detector.py +130 -0
- search_parser-0.0.1/src/search_engine_parser/core/models.py +33 -0
- search_parser-0.0.1/src/search_engine_parser/core/parser.py +124 -0
- search_parser-0.0.1/src/search_engine_parser/exceptions.py +21 -0
- search_parser-0.0.1/src/search_engine_parser/formatters/__init__.py +11 -0
- search_parser-0.0.1/src/search_engine_parser/formatters/base.py +27 -0
- search_parser-0.0.1/src/search_engine_parser/formatters/json_formatter.py +21 -0
- search_parser-0.0.1/src/search_engine_parser/formatters/markdown_formatter.py +119 -0
- search_parser-0.0.1/src/search_engine_parser/parsers/__init__.py +21 -0
- search_parser-0.0.1/src/search_engine_parser/parsers/base.py +79 -0
- search_parser-0.0.1/src/search_engine_parser/parsers/bing.py +132 -0
- search_parser-0.0.1/src/search_engine_parser/parsers/duckduckgo.py +137 -0
- search_parser-0.0.1/src/search_engine_parser/parsers/google.py +226 -0
- search_parser-0.0.1/src/search_engine_parser/utils.py +35 -0
- search_parser-0.0.1/tests/__init__.py +0 -0
- search_parser-0.0.1/tests/conftest.py +54 -0
- search_parser-0.0.1/tests/fixtures/bing/organic_results.html +30 -0
- search_parser-0.0.1/tests/fixtures/bing/search_github_repos.html +101 -0
- search_parser-0.0.1/tests/fixtures/duckduckgo/organic_results.html +25 -0
- search_parser-0.0.1/tests/fixtures/duckduckgo/search_github_repos.html +29 -0
- search_parser-0.0.1/tests/fixtures/google/featured_snippet.html +35 -0
- search_parser-0.0.1/tests/fixtures/google/knowledge_panel.html +28 -0
- search_parser-0.0.1/tests/fixtures/google/organic_results.html +36 -0
- search_parser-0.0.1/tests/fixtures/google/search_best_employee_scheduling_app.html +53 -0
- search_parser-0.0.1/tests/fixtures/google/search_github_repos.html +62 -0
- search_parser-0.0.1/tests/integration/__init__.py +0 -0
- search_parser-0.0.1/tests/integration/test_end_to_end.py +124 -0
- search_parser-0.0.1/tests/test_cli.py +64 -0
- search_parser-0.0.1/tests/test_coverage_boost.py +502 -0
- search_parser-0.0.1/tests/test_formatters.py +154 -0
- search_parser-0.0.1/tests/unit/__init__.py +0 -0
- search_parser-0.0.1/tests/unit/test_bing_parser.py +80 -0
- search_parser-0.0.1/tests/unit/test_detector.py +61 -0
- search_parser-0.0.1/tests/unit/test_duckduckgo_parser.py +106 -0
- search_parser-0.0.1/tests/unit/test_google_parser.py +124 -0
- search_parser-0.0.1/uv.lock +2056 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug Report
|
|
3
|
+
about: Report a bug to help us improve
|
|
4
|
+
title: "[BUG] "
|
|
5
|
+
labels: bug
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
**Describe the bug**
|
|
10
|
+
A clear description of what the bug is.
|
|
11
|
+
|
|
12
|
+
**To Reproduce**
|
|
13
|
+
Steps to reproduce:
|
|
14
|
+
1. Parse HTML from '...'
|
|
15
|
+
2. Use output format '...'
|
|
16
|
+
3. See error
|
|
17
|
+
|
|
18
|
+
**Expected behavior**
|
|
19
|
+
What you expected to happen.
|
|
20
|
+
|
|
21
|
+
**HTML Sample**
|
|
22
|
+
If applicable, provide a minimal HTML snippet that reproduces the issue.
|
|
23
|
+
|
|
24
|
+
**Environment:**
|
|
25
|
+
- OS: [e.g., Ubuntu 22.04]
|
|
26
|
+
- Python version: [e.g., 3.11]
|
|
27
|
+
- Package version: [e.g., 0.1.0]
|
|
28
|
+
|
|
29
|
+
**Additional context**
|
|
30
|
+
Any other context about the problem.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature Request
|
|
3
|
+
about: Suggest an idea for this project
|
|
4
|
+
title: "[FEATURE] "
|
|
5
|
+
labels: enhancement
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
**Is your feature request related to a problem?**
|
|
10
|
+
A clear description of the problem. Ex. I'm frustrated when [...]
|
|
11
|
+
|
|
12
|
+
**Describe the solution you'd like**
|
|
13
|
+
What you want to happen.
|
|
14
|
+
|
|
15
|
+
**Describe alternatives you've considered**
|
|
16
|
+
Any alternative solutions or features you've considered.
|
|
17
|
+
|
|
18
|
+
**Additional context**
|
|
19
|
+
Any other context or screenshots about the feature request.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# To get started with Dependabot version updates, you'll need to specify which
|
|
2
|
+
# package ecosystems to update and where the package manifests are located.
|
|
3
|
+
# Please see the documentation for all configuration options:
|
|
4
|
+
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
|
5
|
+
|
|
6
|
+
version: 2
|
|
7
|
+
updates:
|
|
8
|
+
- package-ecosystem: "pip"
|
|
9
|
+
directory: "/"
|
|
10
|
+
schedule:
|
|
11
|
+
interval: "weekly"
|
|
12
|
+
|
|
13
|
+
- package-ecosystem: "github-actions"
|
|
14
|
+
directory: "/"
|
|
15
|
+
schedule:
|
|
16
|
+
interval: "weekly"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
## Description
|
|
2
|
+
|
|
3
|
+
Brief description of the changes.
|
|
4
|
+
|
|
5
|
+
## Type of Change
|
|
6
|
+
|
|
7
|
+
- [ ] Bug fix
|
|
8
|
+
- [ ] New feature
|
|
9
|
+
- [ ] Breaking change
|
|
10
|
+
- [ ] Documentation update
|
|
11
|
+
|
|
12
|
+
## Checklist
|
|
13
|
+
|
|
14
|
+
- [ ] My code follows the project's style guidelines
|
|
15
|
+
- [ ] I have added tests that prove my fix/feature works
|
|
16
|
+
- [ ] All new and existing tests pass (`uv run pytest`)
|
|
17
|
+
- [ ] Linting passes (`uv run ruff check .`)
|
|
18
|
+
- [ ] Type checking passes (`uv run mypy src/search_engine_parser`)
|
|
19
|
+
- [ ] I have updated documentation as needed
|
|
20
|
+
|
|
21
|
+
## Test Plan
|
|
22
|
+
|
|
23
|
+
How were these changes tested?
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# For most projects, this workflow file will not need changing; you simply need
|
|
2
|
+
# to commit it to your repository.
|
|
3
|
+
#
|
|
4
|
+
# You may wish to alter this file to override the set of languages analyzed,
|
|
5
|
+
# or to provide custom queries or build logic.
|
|
6
|
+
#
|
|
7
|
+
# ******** NOTE ********
|
|
8
|
+
# We have attempted to detect the languages in your repository. Please check
|
|
9
|
+
# the `language` matrix defined below to confirm you have the correct set of
|
|
10
|
+
# supported CodeQL languages.
|
|
11
|
+
#
|
|
12
|
+
name: "CodeQL Advanced"
|
|
13
|
+
|
|
14
|
+
on:
|
|
15
|
+
push:
|
|
16
|
+
branches: [ "main" ]
|
|
17
|
+
pull_request:
|
|
18
|
+
branches: [ "main" ]
|
|
19
|
+
schedule:
|
|
20
|
+
- cron: '35 10 * * 1'
|
|
21
|
+
|
|
22
|
+
jobs:
|
|
23
|
+
analyze:
|
|
24
|
+
name: Analyze (${{ matrix.language }})
|
|
25
|
+
# Runner size impacts CodeQL analysis time. To learn more, please see:
|
|
26
|
+
# - https://gh.io/recommended-hardware-resources-for-running-codeql
|
|
27
|
+
# - https://gh.io/supported-runners-and-hardware-resources
|
|
28
|
+
# - https://gh.io/using-larger-runners (GitHub.com only)
|
|
29
|
+
# Consider using larger runners or machines with greater resources for possible analysis time improvements.
|
|
30
|
+
runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
|
|
31
|
+
permissions:
|
|
32
|
+
# required for all workflows
|
|
33
|
+
security-events: write
|
|
34
|
+
|
|
35
|
+
# required to fetch internal or private CodeQL packs
|
|
36
|
+
packages: read
|
|
37
|
+
|
|
38
|
+
# only required for workflows in private repositories
|
|
39
|
+
actions: read
|
|
40
|
+
contents: read
|
|
41
|
+
|
|
42
|
+
strategy:
|
|
43
|
+
fail-fast: false
|
|
44
|
+
matrix:
|
|
45
|
+
include:
|
|
46
|
+
- language: actions
|
|
47
|
+
build-mode: none
|
|
48
|
+
- language: python
|
|
49
|
+
build-mode: none
|
|
50
|
+
# CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift'
|
|
51
|
+
# Use `c-cpp` to analyze code written in C, C++ or both
|
|
52
|
+
# Use 'java-kotlin' to analyze code written in Java, Kotlin or both
|
|
53
|
+
# Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
|
|
54
|
+
# To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
|
|
55
|
+
# see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
|
|
56
|
+
# If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
|
|
57
|
+
# your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
|
|
58
|
+
steps:
|
|
59
|
+
- name: Checkout repository
|
|
60
|
+
uses: actions/checkout@v4
|
|
61
|
+
|
|
62
|
+
# Add any setup steps before running the `github/codeql-action/init` action.
|
|
63
|
+
# This includes steps like installing compilers or runtimes (`actions/setup-node`
|
|
64
|
+
# or others). This is typically only required for manual builds.
|
|
65
|
+
# - name: Setup runtime (example)
|
|
66
|
+
# uses: actions/setup-example@v1
|
|
67
|
+
|
|
68
|
+
# Initializes the CodeQL tools for scanning.
|
|
69
|
+
- name: Initialize CodeQL
|
|
70
|
+
uses: github/codeql-action/init@v4
|
|
71
|
+
with:
|
|
72
|
+
languages: ${{ matrix.language }}
|
|
73
|
+
build-mode: ${{ matrix.build-mode }}
|
|
74
|
+
# If you wish to specify custom queries, you can do so here or in a config file.
|
|
75
|
+
# By default, queries listed here will override any specified in a config file.
|
|
76
|
+
# Prefix the list here with "+" to use these queries and those in the config file.
|
|
77
|
+
|
|
78
|
+
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
|
|
79
|
+
# queries: security-extended,security-and-quality
|
|
80
|
+
|
|
81
|
+
# If the analyze step fails for one of the languages you are analyzing with
|
|
82
|
+
# "We were unable to automatically build your code", modify the matrix above
|
|
83
|
+
# to set the build mode to "manual" for that language. Then modify this step
|
|
84
|
+
# to build your code.
|
|
85
|
+
# ℹ️ Command-line programs to run using the OS shell.
|
|
86
|
+
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
|
87
|
+
- name: Run manual build steps
|
|
88
|
+
if: matrix.build-mode == 'manual'
|
|
89
|
+
shell: bash
|
|
90
|
+
run: |
|
|
91
|
+
echo 'If you are using a "manual" build mode for one or more of the' \
|
|
92
|
+
'languages you are analyzing, replace this with the commands to build' \
|
|
93
|
+
'your code, for example:'
|
|
94
|
+
echo ' make bootstrap'
|
|
95
|
+
echo ' make release'
|
|
96
|
+
exit 1
|
|
97
|
+
|
|
98
|
+
- name: Perform CodeQL Analysis
|
|
99
|
+
uses: github/codeql-action/analyze@v4
|
|
100
|
+
with:
|
|
101
|
+
category: "/language:${{matrix.language}}"
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: Coverage Badge
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
coverage:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v6
|
|
12
|
+
|
|
13
|
+
- name: Install uv
|
|
14
|
+
uses: astral-sh/setup-uv@v7
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
run: uv python install 3.11
|
|
18
|
+
|
|
19
|
+
- name: Install dependencies
|
|
20
|
+
run: uv sync --all-extras
|
|
21
|
+
|
|
22
|
+
- name: Run tests with coverage
|
|
23
|
+
run: |
|
|
24
|
+
uv run pytest --cov=search_engine_parser --cov-report=term --cov-report=json
|
|
25
|
+
COVERAGE=$(python -c "import json; print(int(float(json.load(open('coverage.json'))['totals']['percent_covered_display'])))")
|
|
26
|
+
echo "COVERAGE=${COVERAGE}%" >> $GITHUB_ENV
|
|
27
|
+
if [ "$COVERAGE" -ge 90 ]; then echo "COVERAGE_COLOR=brightgreen" >> $GITHUB_ENV;
|
|
28
|
+
elif [ "$COVERAGE" -ge 70 ]; then echo "COVERAGE_COLOR=yellow" >> $GITHUB_ENV;
|
|
29
|
+
else echo "COVERAGE_COLOR=red" >> $GITHUB_ENV; fi
|
|
30
|
+
|
|
31
|
+
- name: Create coverage badge
|
|
32
|
+
uses: schneegans/dynamic-badges-action@v1.7.0
|
|
33
|
+
with:
|
|
34
|
+
auth: ${{ secrets.GIST_SECRET }}
|
|
35
|
+
gistID: ${{ vars.GIST_ID }}
|
|
36
|
+
filename: search-parser-coverage.json
|
|
37
|
+
label: coverage
|
|
38
|
+
message: ${{ env.COVERAGE }}
|
|
39
|
+
color: ${{ env.COVERAGE_COLOR }}
|
|
40
|
+
namedLogo: python
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Lint
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v6
|
|
14
|
+
|
|
15
|
+
- name: Install uv
|
|
16
|
+
uses: astral-sh/setup-uv@v7
|
|
17
|
+
|
|
18
|
+
- name: Set up Python
|
|
19
|
+
run: uv python install 3.11
|
|
20
|
+
|
|
21
|
+
- name: Install dependencies
|
|
22
|
+
run: uv sync --all-extras
|
|
23
|
+
|
|
24
|
+
- name: Run ruff check
|
|
25
|
+
run: uv run ruff check .
|
|
26
|
+
|
|
27
|
+
- name: Run ruff format check
|
|
28
|
+
run: uv run ruff format --check .
|
|
29
|
+
|
|
30
|
+
- name: Run mypy
|
|
31
|
+
run: uv run mypy src/search_engine_parser
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
deploy:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
id-token: write
|
|
12
|
+
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v6
|
|
15
|
+
|
|
16
|
+
- name: Install uv
|
|
17
|
+
uses: astral-sh/setup-uv@v7
|
|
18
|
+
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
run: uv python install 3.11
|
|
21
|
+
|
|
22
|
+
- name: Build package
|
|
23
|
+
run: uv build
|
|
24
|
+
|
|
25
|
+
- name: Publish to PyPI
|
|
26
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v6
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v7
|
|
21
|
+
with:
|
|
22
|
+
enable-cache: true
|
|
23
|
+
|
|
24
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
25
|
+
run: uv python install ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv sync --all-extras
|
|
29
|
+
|
|
30
|
+
- name: Run tests with coverage
|
|
31
|
+
run: uv run pytest --cov=search_engine_parser --cov-report=xml --cov-report=term --cov-report=html
|
|
32
|
+
|
|
33
|
+
- name: Upload coverage to Codecov
|
|
34
|
+
uses: codecov/codecov-action@v5
|
|
35
|
+
if: matrix.python-version == '3.11'
|
|
36
|
+
with:
|
|
37
|
+
file: ./coverage.xml
|
|
38
|
+
fail_ci_if_error: true
|
|
39
|
+
token: ${{ secrets.CODECOV_TOKEN }}
|
|
40
|
+
|
|
41
|
+
- name: Upload coverage HTML artifact
|
|
42
|
+
uses: actions/upload-artifact@v6
|
|
43
|
+
if: matrix.python-version == '3.11'
|
|
44
|
+
with:
|
|
45
|
+
name: coverage-report
|
|
46
|
+
path: htmlcov/
|
|
47
|
+
retention-days: 30
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
*.egg-info/
|
|
7
|
+
*.egg
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
eggs/
|
|
11
|
+
*.whl
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
ENV/
|
|
17
|
+
.env
|
|
18
|
+
|
|
19
|
+
# IDE
|
|
20
|
+
.idea/
|
|
21
|
+
.vscode/
|
|
22
|
+
*.swp
|
|
23
|
+
*.swo
|
|
24
|
+
*~
|
|
25
|
+
|
|
26
|
+
# Testing
|
|
27
|
+
htmlcov/
|
|
28
|
+
.coverage
|
|
29
|
+
.coverage.*
|
|
30
|
+
coverage.xml
|
|
31
|
+
*.cover
|
|
32
|
+
.pytest_cache/
|
|
33
|
+
|
|
34
|
+
# mypy
|
|
35
|
+
.mypy_cache/
|
|
36
|
+
|
|
37
|
+
# ruff
|
|
38
|
+
.ruff_cache/
|
|
39
|
+
|
|
40
|
+
# Documentation
|
|
41
|
+
site/
|
|
42
|
+
|
|
43
|
+
# OS
|
|
44
|
+
.DS_Store
|
|
45
|
+
Thumbs.db
|
|
46
|
+
|
|
47
|
+
# uv
|
|
48
|
+
|
|
49
|
+
search_parser_prompt.md
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.1.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
|
|
9
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
10
|
+
rev: v1.7.0
|
|
11
|
+
hooks:
|
|
12
|
+
- id: mypy
|
|
13
|
+
additional_dependencies:
|
|
14
|
+
- pydantic>=2.0.0
|
|
15
|
+
- types-beautifulsoup4
|
|
16
|
+
- lxml-stubs
|
|
17
|
+
args: [--strict]
|
|
18
|
+
files: ^src/
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2025-01-01
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Initial release of `search-parser`.
|
|
15
|
+
- Core parsing framework with `BaseParser` and `SearchResult` data model.
|
|
16
|
+
- Google search results parser with auto-detection.
|
|
17
|
+
- Bing search results parser with auto-detection.
|
|
18
|
+
- DuckDuckGo search results parser with auto-detection.
|
|
19
|
+
- Auto-detection of search engine from raw HTML content.
|
|
20
|
+
- Three output formats: JSON, Markdown, and Python dict.
|
|
21
|
+
- Command-line interface (`search-parser parse`).
|
|
22
|
+
- Comprehensive test suite with HTML fixtures.
|
|
23
|
+
- Documentation site using MkDocs with Material theme.
|
|
24
|
+
- CI/CD workflows for testing, linting, coverage, and publishing.
|
|
25
|
+
- Pre-commit hooks for ruff and mypy.
|
|
26
|
+
|
|
27
|
+
[Unreleased]: https://github.com/getlinksc/search-parser/compare/v0.1.0...HEAD
|
|
28
|
+
[0.1.0]: https://github.com/getlinksc/search-parser/releases/tag/v0.1.0
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# CLAUDE.md - Search Engine Parser
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
Python library for parsing search engine HTML results into structured data (JSON, Markdown, dict).
|
|
5
|
+
|
|
6
|
+
## Architecture
|
|
7
|
+
- **Strategy Pattern**: Each search engine has its own parser implementing `BaseParser`
|
|
8
|
+
- **Plugin Architecture**: New engines added by creating a parser class and registering it
|
|
9
|
+
- **Layers**: Detection → Parsing → Formatting
|
|
10
|
+
|
|
11
|
+
## Package Manager
|
|
12
|
+
Using `uv` for fast, modern Python package management.
|
|
13
|
+
|
|
14
|
+
## Common Commands
|
|
15
|
+
```bash
|
|
16
|
+
uv sync --all-extras # Install/sync all dependencies
|
|
17
|
+
uv run pytest # Run tests
|
|
18
|
+
uv run pytest --cov=search_engine_parser --cov-report=term # Tests with coverage
|
|
19
|
+
uv run ruff check . # Lint code
|
|
20
|
+
uv run ruff format . # Format code
|
|
21
|
+
uv run mypy src/search_engine_parser # Type check
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Adding a New Search Engine Parser
|
|
25
|
+
1. Create `src/search_engine_parser/parsers/engine_name.py`
|
|
26
|
+
2. Implement class extending `BaseParser` with `engine_name`, `parse()`, and `can_parse()` methods
|
|
27
|
+
3. Register in `src/search_engine_parser/parsers/__init__.py`
|
|
28
|
+
4. Add HTML test fixtures in `tests/fixtures/engine_name/`
|
|
29
|
+
5. Write unit tests in `tests/unit/test_engine_name_parser.py`
|
|
30
|
+
|
|
31
|
+
## Coding Standards
|
|
32
|
+
- Full type hints on all functions (mypy --strict)
|
|
33
|
+
- Google-style docstrings on public APIs
|
|
34
|
+
- `ruff check` + `ruff format` (line-length 100)
|
|
35
|
+
- Functions under 15 lines when possible
|
|
36
|
+
|
|
37
|
+
## Testing
|
|
38
|
+
- pytest with pytest-cov, target >90% coverage
|
|
39
|
+
- HTML fixtures in tests/fixtures/ for each engine
|
|
40
|
+
- Unit tests per parser, integration tests for full workflows
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our
|
|
6
|
+
community a harassment-free experience for everyone, regardless of age, body
|
|
7
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
|
8
|
+
identity and expression, level of experience, education, socio-economic status,
|
|
9
|
+
nationality, personal appearance, race, caste, color, religion, or sexual
|
|
10
|
+
identity and orientation.
|
|
11
|
+
|
|
12
|
+
## Our Standards
|
|
13
|
+
|
|
14
|
+
Examples of behavior that contributes to a positive environment:
|
|
15
|
+
|
|
16
|
+
* Using welcoming and inclusive language
|
|
17
|
+
* Being respectful of differing viewpoints and experiences
|
|
18
|
+
* Gracefully accepting constructive criticism
|
|
19
|
+
* Focusing on what is best for the community
|
|
20
|
+
* Showing empathy towards other community members
|
|
21
|
+
|
|
22
|
+
Examples of unacceptable behavior:
|
|
23
|
+
|
|
24
|
+
* The use of sexualized language or imagery, and sexual attention or advances of any kind
|
|
25
|
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
|
26
|
+
* Public or private harassment
|
|
27
|
+
* Publishing others' private information without explicit permission
|
|
28
|
+
* Other conduct which could reasonably be considered inappropriate in a professional setting
|
|
29
|
+
|
|
30
|
+
## Enforcement
|
|
31
|
+
|
|
32
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
33
|
+
reported to the project maintainers. All complaints will be reviewed and
|
|
34
|
+
investigated promptly and fairly.
|
|
35
|
+
|
|
36
|
+
## Attribution
|
|
37
|
+
|
|
38
|
+
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/),
|
|
39
|
+
version 2.1, available at
|
|
40
|
+
https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
|