inscriptis 2.6.0__tar.gz → 2.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inscriptis-2.7.0/.coveragerc +2 -0
- inscriptis-2.7.0/.git-blame-ignore-revs +2 -0
- inscriptis-2.7.0/.github/dependabot.yml +18 -0
- inscriptis-2.7.0/.github/workflows/codeql-analysis.yml +68 -0
- inscriptis-2.7.0/.github/workflows/create-container.yml +35 -0
- inscriptis-2.7.0/.github/workflows/helm-release.yaml +20 -0
- inscriptis-2.7.0/.github/workflows/python-package.yml +29 -0
- inscriptis-2.7.0/.gitignore +30 -0
- inscriptis-2.7.0/.readthedocs.yaml +35 -0
- inscriptis-2.7.0/.safety-project.ini +5 -0
- inscriptis-2.7.0/CONTRIBUTING.md +54 -0
- inscriptis-2.7.0/Dockerfile +24 -0
- {inscriptis-2.6.0 → inscriptis-2.7.0}/PKG-INFO +116 -99
- {inscriptis-2.6.0 → inscriptis-2.7.0}/README.rst +101 -83
- inscriptis-2.7.0/RENDERING.md +244 -0
- inscriptis-2.7.0/TODO.txt +5 -0
- inscriptis-2.7.0/benchmarking/a +113 -0
- inscriptis-2.7.0/benchmarking/b +3 -0
- inscriptis-2.7.0/benchmarking/run_benchmarking.py +369 -0
- inscriptis-2.7.0/benchmarking/speed_comparisons.txt +113 -0
- inscriptis-2.7.0/benchmarking/url_list.txt +18 -0
- inscriptis-2.7.0/docker-compose.yml +14 -0
- inscriptis-2.7.0/docs/Makefile +20 -0
- inscriptis-2.7.0/docs/README.rst +1 -0
- inscriptis-2.7.0/docs/api.rst +76 -0
- inscriptis-2.7.0/docs/benchmarking.rst +57 -0
- inscriptis-2.7.0/docs/conf.py +201 -0
- inscriptis-2.7.0/docs/contributing.md +1 -0
- inscriptis-2.7.0/docs/images/stackoverflow-code-annotation.png +0 -0
- inscriptis-2.7.0/docs/images/wikipedia-chur-entry-annotation.png +0 -0
- inscriptis-2.7.0/docs/images/wikipedia-chur-table-annotation.png +0 -0
- inscriptis-2.7.0/docs/images/xda-posts-annotation.png +0 -0
- inscriptis-2.7.0/docs/index.rst +29 -0
- inscriptis-2.7.0/docs/paper/Makefile +6 -0
- inscriptis-2.7.0/docs/paper/images/annotations.png +0 -0
- inscriptis-2.7.0/docs/paper/images/inscriptis-vs-lynx.png +0 -0
- inscriptis-2.7.0/docs/paper/images/inscriptis-vs-lynx.xcf +0 -0
- inscriptis-2.7.0/docs/paper/images/raw/inscriptis.png +0 -0
- inscriptis-2.7.0/docs/paper/images/raw/lynx.png +0 -0
- inscriptis-2.7.0/docs/paper/paper.bib +515 -0
- inscriptis-2.7.0/docs/paper/paper.md +82 -0
- inscriptis-2.7.0/docs/requirements.txt +4 -0
- inscriptis-2.7.0/examples/annotation/annotation-profile.json +14 -0
- inscriptis-2.7.0/examples/annotation/stackoverflow.json +14 -0
- inscriptis-2.7.0/examples/annotation/table-annotation-profile.json +7 -0
- inscriptis-2.7.0/examples/annotation/unittest.json +7 -0
- inscriptis-2.7.0/examples/annotation/wikipedia-entities-and-citations.json +5 -0
- inscriptis-2.7.0/examples/annotation/wikipedia.json +12 -0
- inscriptis-2.7.0/examples/annotation/xda-developers.json +6 -0
- inscriptis-2.7.0/examples/custom-html-handling.py +41 -0
- inscriptis-2.7.0/img/nested-table-firefox.png +0 -0
- inscriptis-2.7.0/img/wikipedia-chur-firefox.png +0 -0
- inscriptis-2.7.0/img/wikipedia-python-example.png +0 -0
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/__init__.py +16 -11
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/annotation/__init__.py +4 -6
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/annotation/output/__init__.py +3 -2
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/annotation/output/html.py +16 -16
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/annotation/output/surface.py +6 -8
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/annotation/output/xml.py +2 -2
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/annotation/parser.py +9 -6
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/cli/inscript.py +25 -46
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/css_profiles.py +8 -14
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/html_engine.py +28 -26
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/metadata.py +2 -3
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/attribute.py +7 -11
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/canvas/__init__.py +8 -6
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/canvas/block.py +8 -7
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/canvas/prefix.py +7 -7
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/config.py +52 -8
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/css.py +7 -6
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/html_document_state.py +10 -5
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/html_element.py +39 -31
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/table.py +33 -50
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/tag/__init__.py +7 -4
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/tag/a_tag.py +1 -2
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/tag/br_tag.py +1 -2
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/tag/img_tag.py +2 -5
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/tag/list_tag.py +8 -4
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/tag/table_tag.py +5 -8
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/service/web.py +6 -10
- inscriptis-2.7.0/publish.sh +38 -0
- inscriptis-2.7.0/pyproject.toml +137 -0
- inscriptis-2.7.0/tests/__init__.py +0 -0
- inscriptis-2.7.0/tests/data/annotation-profile-unittest.json +7 -0
- inscriptis-2.7.0/tests/html/advanced-prefix-test.html +19 -0
- inscriptis-2.7.0/tests/html/advanced-prefix-test.txt +13 -0
- inscriptis-2.7.0/tests/html/br-in-table.html +10 -0
- inscriptis-2.7.0/tests/html/br-in-table.txt +3 -0
- inscriptis-2.7.0/tests/html/br-in-table2.html +22 -0
- inscriptis-2.7.0/tests/html/br-li.html +6 -0
- inscriptis-2.7.0/tests/html/br-li.txt +4 -0
- inscriptis-2.7.0/tests/html/br.html +2 -0
- inscriptis-2.7.0/tests/html/br.txt +2 -0
- inscriptis-2.7.0/tests/html/direct-enumeration.html +13 -0
- inscriptis-2.7.0/tests/html/direct-enumeration.txt +7 -0
- inscriptis-2.7.0/tests/html/empty-table.html +3 -0
- inscriptis-2.7.0/tests/html/empty-table.txt +1 -0
- inscriptis-2.7.0/tests/html/enumeration-multiple-value.html +8 -0
- inscriptis-2.7.0/tests/html/enumeration-multiple-value.txt +6 -0
- inscriptis-2.7.0/tests/html/enumeration-value.html +8 -0
- inscriptis-2.7.0/tests/html/enumeration-value.txt +6 -0
- inscriptis-2.7.0/tests/html/enumerations.html +14 -0
- inscriptis-2.7.0/tests/html/enumerations.txt +8 -0
- inscriptis-2.7.0/tests/html/html-comment-ofuscation.html +1 -0
- inscriptis-2.7.0/tests/html/html-comment-ofuscation.txt +1 -0
- inscriptis-2.7.0/tests/html/invalid-table.html +14 -0
- inscriptis-2.7.0/tests/html/invalid-table.txt +3 -0
- inscriptis-2.7.0/tests/html/invalid-table2.html +10 -0
- inscriptis-2.7.0/tests/html/invalid-table2.txt +4 -0
- inscriptis-2.7.0/tests/html/invalid-table3.html +10 -0
- inscriptis-2.7.0/tests/html/invalid-table3.txt +4 -0
- inscriptis-2.7.0/tests/html/invisible.html +4 -0
- inscriptis-2.7.0/tests/html/invisible.txt +1 -0
- inscriptis-2.7.0/tests/html/invisible2.html +2 -0
- inscriptis-2.7.0/tests/html/invisible2.txt +1 -0
- inscriptis-2.7.0/tests/html/invisible3.html +2 -0
- inscriptis-2.7.0/tests/html/invisible3.txt +0 -0
- inscriptis-2.7.0/tests/html/nested-list.html +36 -0
- inscriptis-2.7.0/tests/html/nested-list.txt +21 -0
- inscriptis-2.7.0/tests/html/nested-table-alignment-css.html +23 -0
- inscriptis-2.7.0/tests/html/nested-table-alignment-css.txt +7 -0
- inscriptis-2.7.0/tests/html/nested-table-alignment.html +23 -0
- inscriptis-2.7.0/tests/html/nested-table-alignment.txt +7 -0
- inscriptis-2.7.0/tests/html/nested-table.html +19 -0
- inscriptis-2.7.0/tests/html/nested-table.txt +5 -0
- inscriptis-2.7.0/tests/html/p-br.html +9 -0
- inscriptis-2.7.0/tests/html/p-br.txt +11 -0
- inscriptis-2.7.0/tests/html/pre.html +22 -0
- inscriptis-2.7.0/tests/html/pre.txt +20 -0
- inscriptis-2.7.0/tests/html/real-world/avantec-team.html +903 -0
- inscriptis-2.7.0/tests/html/real-world/naturgruen-team.html +177 -0
- inscriptis-2.7.0/tests/html/real-world/rswag-mitarbeiter.html +811 -0
- inscriptis-2.7.0/tests/html/stackoverflow-list-snippet.html +34 -0
- inscriptis-2.7.0/tests/html/stackoverflow-list-snippet.txt +2 -0
- inscriptis-2.7.0/tests/html/subsequent-headings.html +24 -0
- inscriptis-2.7.0/tests/html/subsequent-headings.json +18 -0
- inscriptis-2.7.0/tests/html/subsequent-headings.txt +21 -0
- inscriptis-2.7.0/tests/html/table-alignment.html +8 -0
- inscriptis-2.7.0/tests/html/table-alignment.txt +4 -0
- inscriptis-2.7.0/tests/html/table-empty-row.html +9 -0
- inscriptis-2.7.0/tests/html/table-empty-row.txt +5 -0
- inscriptis-2.7.0/tests/html/table-in-table.html +48 -0
- inscriptis-2.7.0/tests/html/table-in-table.json +25 -0
- inscriptis-2.7.0/tests/html/table-in-table.txt +29 -0
- inscriptis-2.7.0/tests/html/table-itemize.html +7 -0
- inscriptis-2.7.0/tests/html/table-itemize.txt +4 -0
- inscriptis-2.7.0/tests/html/table-pre.html +36 -0
- inscriptis-2.7.0/tests/html/table-pre.txt +12 -0
- inscriptis-2.7.0/tests/html/table.html +10 -0
- inscriptis-2.7.0/tests/html/table.json +20 -0
- inscriptis-2.7.0/tests/html/table.txt +2 -0
- inscriptis-2.7.0/tests/html/td-only-table.html +5 -0
- inscriptis-2.7.0/tests/html/td-only-table.txt +1 -0
- inscriptis-2.7.0/tests/html/test.html +123 -0
- inscriptis-2.7.0/tests/html/tr-only-table.html +5 -0
- inscriptis-2.7.0/tests/html/tr-only-table.txt +3 -0
- inscriptis-2.7.0/tests/html/whitespace.html +9 -0
- inscriptis-2.7.0/tests/html/whitespace.txt +4 -0
- inscriptis-2.7.0/tests/html/wikipedia-code.html +14 -0
- inscriptis-2.7.0/tests/html/wikipedia-code.txt +16 -0
- inscriptis-2.7.0/tests/html/wikipedia-consequtive-links-and-umlauts.html +16 -0
- inscriptis-2.7.0/tests/html/wikipedia-consequtive-links-and-umlauts.txt +1 -0
- inscriptis-2.7.0/tests/html/wikipedia-consequtive-tables.html +243 -0
- inscriptis-2.7.0/tests/html/wikipedia-consequtive-tables.json +32 -0
- inscriptis-2.7.0/tests/html/wikipedia-enumeration-annotation.html +66 -0
- inscriptis-2.7.0/tests/html/wikipedia-enumeration-annotation.json +19 -0
- inscriptis-2.7.0/tests/html/wikipedia-enumeration-annotation.txt +45 -0
- inscriptis-2.7.0/tests/html/wikipedia-enumeration.html +61 -0
- inscriptis-2.7.0/tests/html/wikipedia-enumeration.txt +39 -0
- inscriptis-2.7.0/tests/html/wikipedia-equation.html +10 -0
- inscriptis-2.7.0/tests/html/wikipedia-equation.txt +7 -0
- inscriptis-2.7.0/tests/html/wikipedia-table-bordercase-verticial-alignmnet.html +28 -0
- inscriptis-2.7.0/tests/html/wikipedia-table-bordercase-verticial-alignmnet.json +31 -0
- inscriptis-2.7.0/tests/html/wikipedia-table-bordercase1.html +21 -0
- inscriptis-2.7.0/tests/html/wikipedia-table-bordercase1.json +21 -0
- inscriptis-2.7.0/tests/html/wikipedia-table.html +33 -0
- inscriptis-2.7.0/tests/html/wikipedia-table.json +33 -0
- inscriptis-2.7.0/tests/html/wikipedia-table.txt +9 -0
- inscriptis-2.7.0/tests/test_annotation.py +70 -0
- inscriptis-2.7.0/tests/test_annotation_engine.py +22 -0
- inscriptis-2.7.0/tests/test_annotation_output_processor.py +85 -0
- inscriptis-2.7.0/tests/test_annotation_output_xml.py +73 -0
- inscriptis-2.7.0/tests/test_annotation_rule_parsing.py +74 -0
- inscriptis-2.7.0/tests/test_block.py +66 -0
- inscriptis-2.7.0/tests/test_broken_table_handling.py +23 -0
- inscriptis-2.7.0/tests/test_cli.py +122 -0
- inscriptis-2.7.0/tests/test_custom_html_tag_handling.py +31 -0
- inscriptis-2.7.0/tests/test_double_a.py +16 -0
- inscriptis-2.7.0/tests/test_empty_string.py +16 -0
- inscriptis-2.7.0/tests/test_engine.py +11 -0
- inscriptis-2.7.0/tests/test_html_conversion_options.py +72 -0
- inscriptis-2.7.0/tests/test_html_snippets.py +48 -0
- inscriptis-2.7.0/tests/test_html_snippets_annotations.py +63 -0
- inscriptis-2.7.0/tests/test_invalid_float_specification.py +16 -0
- inscriptis-2.7.0/tests/test_limit_whitespace_affixes.py +62 -0
- inscriptis-2.7.0/tests/test_list_div.py +29 -0
- inscriptis-2.7.0/tests/test_list_value.py +32 -0
- inscriptis-2.7.0/tests/test_margin_before_at_start.py +26 -0
- inscriptis-2.7.0/tests/test_margin_handling.py +36 -0
- inscriptis-2.7.0/tests/test_metadata.py +21 -0
- inscriptis-2.7.0/tests/test_model_html_element_canvas.py +55 -0
- inscriptis-2.7.0/tests/test_model_prefix.py +54 -0
- inscriptis-2.7.0/tests/test_parse_css.py +65 -0
- inscriptis-2.7.0/tests/test_strip_xml_header.py +10 -0
- inscriptis-2.7.0/tests/test_style_parsing.py +15 -0
- inscriptis-2.7.0/tests/test_table_cell.py +47 -0
- inscriptis-2.7.0/tests/test_table_cell_formatting.py +47 -0
- inscriptis-2.7.0/tests/test_table_row.py +26 -0
- inscriptis-2.7.0/tests/test_web_service.py +44 -0
- inscriptis-2.7.0/tests/test_white_space_handling.py +72 -0
- inscriptis-2.7.0/uv.lock +1399 -0
- inscriptis-2.6.0/pyproject.toml +0 -66
- {inscriptis-2.6.0 → inscriptis-2.7.0}/AUTHORS +0 -0
- {inscriptis-2.6.0 → inscriptis-2.7.0}/LICENSE +0 -0
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/cli/__init__.py +0 -0
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/html_properties.py +0 -0
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/model/__init__.py +0 -0
- {inscriptis-2.6.0/src → inscriptis-2.7.0}/inscriptis/service/__init__.py +0 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
|
|
3
|
+
updates:
|
|
4
|
+
|
|
5
|
+
# Enable version updates for github actions.
|
|
6
|
+
- package-ecosystem: "github-actions"
|
|
7
|
+
directory: "/"
|
|
8
|
+
schedule:
|
|
9
|
+
# Check for updates to GitHub Actions every weekday
|
|
10
|
+
interval: "weekly"
|
|
11
|
+
|
|
12
|
+
# Enable version updates for Docker.
|
|
13
|
+
- package-ecosystem: "docker"
|
|
14
|
+
# Look for a `Dockerfile` in the `root` directory
|
|
15
|
+
directory: "/"
|
|
16
|
+
# Check for updates once a week
|
|
17
|
+
schedule:
|
|
18
|
+
interval: "weekly"
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# For most projects, this workflow file will not need changing; you simply need
|
|
2
|
+
# to commit it to your repository.
|
|
3
|
+
#
|
|
4
|
+
# You may wish to alter this file to override the set of languages analyzed,
|
|
5
|
+
# or to provide custom queries or build logic.
|
|
6
|
+
#
|
|
7
|
+
# ******** NOTE ********
|
|
8
|
+
# We have attempted to detect the languages in your repository. Please check
|
|
9
|
+
# the `language` matrix defined below to confirm you have the correct set of
|
|
10
|
+
# supported CodeQL languages.
|
|
11
|
+
#
|
|
12
|
+
name: "CodeQL"
|
|
13
|
+
|
|
14
|
+
on:
|
|
15
|
+
push:
|
|
16
|
+
pull_request:
|
|
17
|
+
schedule:
|
|
18
|
+
- cron: '26 5 * * 2'
|
|
19
|
+
|
|
20
|
+
jobs:
|
|
21
|
+
analyze:
|
|
22
|
+
name: Analyze
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
permissions:
|
|
25
|
+
actions: read
|
|
26
|
+
contents: read
|
|
27
|
+
security-events: write
|
|
28
|
+
|
|
29
|
+
strategy:
|
|
30
|
+
fail-fast: false
|
|
31
|
+
matrix:
|
|
32
|
+
language: [ 'python' ]
|
|
33
|
+
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
|
|
34
|
+
# Learn more:
|
|
35
|
+
# https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
|
|
36
|
+
|
|
37
|
+
steps:
|
|
38
|
+
- name: Checkout repository
|
|
39
|
+
uses: actions/checkout@v3
|
|
40
|
+
|
|
41
|
+
# Initializes the CodeQL tools for scanning.
|
|
42
|
+
- name: Initialize CodeQL
|
|
43
|
+
uses: github/codeql-action/init@v2
|
|
44
|
+
with:
|
|
45
|
+
languages: ${{ matrix.language }}
|
|
46
|
+
# If you wish to specify custom queries, you can do so here or in a config file.
|
|
47
|
+
# By default, queries listed here will override any specified in a config file.
|
|
48
|
+
# Prefix the list here with "+" to use these queries and those in the config file.
|
|
49
|
+
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
|
50
|
+
|
|
51
|
+
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
|
52
|
+
# If this step fails, then you should remove it and run the build manually (see below)
|
|
53
|
+
- name: Autobuild
|
|
54
|
+
uses: github/codeql-action/autobuild@v2
|
|
55
|
+
|
|
56
|
+
# ℹ️ Command-line programs to run using the OS shell.
|
|
57
|
+
# 📚 https://git.io/JvXDl
|
|
58
|
+
|
|
59
|
+
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
|
60
|
+
# and modify them (or add more) to build your code if your project
|
|
61
|
+
# uses a compiled language
|
|
62
|
+
|
|
63
|
+
#- run: |
|
|
64
|
+
# make bootstrap
|
|
65
|
+
# make release
|
|
66
|
+
|
|
67
|
+
- name: Perform CodeQL Analysis
|
|
68
|
+
uses: github/codeql-action/analyze@v2
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: container
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- '*'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- name: Checkout code
|
|
13
|
+
uses: actions/checkout@v3
|
|
14
|
+
|
|
15
|
+
- name: get version
|
|
16
|
+
id: version
|
|
17
|
+
run: echo ::set-output name=APP_VERSION::${GITHUB_REF/refs\/tags\//}
|
|
18
|
+
|
|
19
|
+
- name: init docker build
|
|
20
|
+
uses: docker/setup-buildx-action@v2
|
|
21
|
+
|
|
22
|
+
- name: login docker
|
|
23
|
+
uses: docker/login-action@v2
|
|
24
|
+
with:
|
|
25
|
+
registry: ghcr.io
|
|
26
|
+
username: ${{ github.actor }}
|
|
27
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
28
|
+
|
|
29
|
+
- name: publish container
|
|
30
|
+
uses: docker/build-push-action@v4
|
|
31
|
+
with:
|
|
32
|
+
push: true
|
|
33
|
+
tags: |
|
|
34
|
+
ghcr.io/weblyzard/inscriptis:v${{ steps.version.outputs.APP_VERSION }}
|
|
35
|
+
ghcr.io/weblyzard/inscriptis:latest
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
name: helm release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- PhilippKuntschik-patch-2
|
|
7
|
+
tags:
|
|
8
|
+
- '*'
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
dispatch_helm_release:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- name: dispatch inscriptis-helm
|
|
15
|
+
uses: peter-evans/repository-dispatch@v2
|
|
16
|
+
with:
|
|
17
|
+
token: ${{ secrets.HELMREPO_ACCESS_TOKEN }}
|
|
18
|
+
repository: weblyzard/inscriptis-helm
|
|
19
|
+
event-type: tag-released
|
|
20
|
+
client-payload: '{"ref": "${{ github.ref_name }}"}'
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: build
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
build:
|
|
9
|
+
|
|
10
|
+
runs-on: ubuntu-24.04
|
|
11
|
+
strategy:
|
|
12
|
+
fail-fast: false
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: [ '3.10', '3.11', '3.12', '3.13' ]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v3
|
|
18
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
19
|
+
uses: actions/setup-python@v4
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
- name: Install build environment
|
|
23
|
+
run: |
|
|
24
|
+
python -m pip install --upgrade pip
|
|
25
|
+
python -m pip install uv
|
|
26
|
+
- name: Build and test with uv.
|
|
27
|
+
run: |
|
|
28
|
+
uv run ruff check
|
|
29
|
+
uv build
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
*.pyc
|
|
2
|
+
*.pyx
|
|
3
|
+
.*.swp
|
|
4
|
+
*.egg-info
|
|
5
|
+
__pycache__/
|
|
6
|
+
benchmarking_results/
|
|
7
|
+
html_cache/
|
|
8
|
+
.tox
|
|
9
|
+
build/
|
|
10
|
+
dist/
|
|
11
|
+
.cache/
|
|
12
|
+
.project
|
|
13
|
+
.pydevproject
|
|
14
|
+
.settings/
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
.coverage
|
|
17
|
+
_build/
|
|
18
|
+
.mypy_cache/
|
|
19
|
+
.idea/
|
|
20
|
+
venv/
|
|
21
|
+
tests/converted.txt
|
|
22
|
+
tests/reference.txt
|
|
23
|
+
*.c
|
|
24
|
+
docs/paper/*.pdf
|
|
25
|
+
htmlcov/
|
|
26
|
+
poetry.lock
|
|
27
|
+
|
|
28
|
+
# test
|
|
29
|
+
converted.txt
|
|
30
|
+
reference.txt
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Read the Docs configuration file for Sphinx projects
|
|
2
|
+
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
|
3
|
+
|
|
4
|
+
# Required
|
|
5
|
+
version: 2
|
|
6
|
+
|
|
7
|
+
# Set the OS, Python version and other tools you might need
|
|
8
|
+
build:
|
|
9
|
+
os: ubuntu-22.04
|
|
10
|
+
tools:
|
|
11
|
+
python: "3.12"
|
|
12
|
+
# You can also specify other tool versions:
|
|
13
|
+
# nodejs: "20"
|
|
14
|
+
# rust: "1.70"
|
|
15
|
+
# golang: "1.20"
|
|
16
|
+
|
|
17
|
+
# Build documentation in the "docs/" directory with Sphinx
|
|
18
|
+
sphinx:
|
|
19
|
+
configuration: docs/conf.py
|
|
20
|
+
# You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
|
|
21
|
+
# builder: "dirhtml"
|
|
22
|
+
# Fail on all warnings to avoid broken references
|
|
23
|
+
# fail_on_warning: true
|
|
24
|
+
|
|
25
|
+
# Optionally build your docs in additional formats such as PDF and ePub
|
|
26
|
+
formats:
|
|
27
|
+
- pdf
|
|
28
|
+
# - epub
|
|
29
|
+
|
|
30
|
+
# Optional but recommended, declare the Python requirements required
|
|
31
|
+
# to build your documentation
|
|
32
|
+
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
|
|
33
|
+
python:
|
|
34
|
+
install:
|
|
35
|
+
- requirements: docs/requirements.txt
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Contributing to Inscriptis
|
|
2
|
+
|
|
3
|
+
First off, thank you for considering contributing to inscriptis.
|
|
4
|
+
There are many ways how you can contribute to the project and these guidelines aim at supporting you in doing so.
|
|
5
|
+
|
|
6
|
+
1. [Reporting bugs and seeking support](#reporting-bugs-and-seeking-support)
|
|
7
|
+
2. [Suggesting enhancements](#suggesting-enhancements)
|
|
8
|
+
3. [Pull requests](#pull-requests) (contributing code)
|
|
9
|
+
4. [Python style guide](#python-style-guide)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
## Reporting bugs and seeking support
|
|
13
|
+
|
|
14
|
+
Bugs and support requests are tracked as GitHub issues.
|
|
15
|
+
|
|
16
|
+
To create an effective and high quality ticket, please include the following information in your
|
|
17
|
+
ticket:
|
|
18
|
+
|
|
19
|
+
1. **Use a clear and descriptive title** for the issue to identify the problem. This also helps other users to quickly locate bug reports that affect them.
|
|
20
|
+
2. **Describe the exact steps necessary for reproducing the problem** including at least information on
|
|
21
|
+
- the affected URL
|
|
22
|
+
- the command line parameters or function arguments you used
|
|
23
|
+
3. What would have been the **expected behavior**?
|
|
24
|
+
4. Describe the **observed behavior**.
|
|
25
|
+
5. Provide any additional information which might be helpful in reproducing and/or fixing this issue.
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
## Suggesting enhancements
|
|
29
|
+
|
|
30
|
+
Enhancements are also tracked as GitHub issues and should contain the following information:
|
|
31
|
+
|
|
32
|
+
1. **A clear and descriptive title** helps other people to identify enhancements they like, so that they can also add their thoughts and suggestions.
|
|
33
|
+
2. **Provide a step-by-step description** of the suggested enhancement.
|
|
34
|
+
3. **Describe the current behavior** and **explain which behavior you expected to see instead** and why.
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
## Pull requests
|
|
38
|
+
|
|
39
|
+
1. Ensure that your code complies with our [Python style guide](#python-style-guide).
|
|
40
|
+
2. Write a unit test that covers your new code and put it into the `./tests` directory.
|
|
41
|
+
3. Execute `tox .` in the project's root directory to ensure that your code passes the static code analysis, coding style guidelines and security checks.
|
|
42
|
+
4. In addition, please document any new API functions in the Inscriptis documentation.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
## Python style guide
|
|
46
|
+
|
|
47
|
+
Inscriptis code should comply to
|
|
48
|
+
- the [PEP8 Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/), and
|
|
49
|
+
- to the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html)
|
|
50
|
+
|
|
51
|
+
Please also ensure that
|
|
52
|
+
1. functions are properly documented with docstrings that comply to the Google Python Style Guide, and
|
|
53
|
+
2. any new code is covered by unit tests.
|
|
54
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Stage 1 - Install build dependencies
|
|
3
|
+
#
|
|
4
|
+
FROM python:3.11-slim-bullseye AS builder
|
|
5
|
+
|
|
6
|
+
WORKDIR /inscriptis
|
|
7
|
+
RUN python -m venv .venv && .venv/bin/python -m pip install --upgrade pip
|
|
8
|
+
RUN .venv/bin/pip install --no-cache-dir inscriptis[web-service] && \
|
|
9
|
+
find /inscriptis/.venv \( -type d -a -name test -o -name tests \) -o \( -type f -a -name '*.pyc' -o -name '*.pyo' \) -exec rm -rf '{}' \+
|
|
10
|
+
|
|
11
|
+
#
|
|
12
|
+
# Stage 2 - Copy only necessary files to the runner stage
|
|
13
|
+
#
|
|
14
|
+
FROM python:3.11-slim-bullseye
|
|
15
|
+
LABEL maintainer="albert@weichselbraun.net"
|
|
16
|
+
|
|
17
|
+
# Note: only copy the src directory, to prevent bloating the image with
|
|
18
|
+
# irrelevant files from the project directory.
|
|
19
|
+
WORKDIR /inscriptis
|
|
20
|
+
COPY --from=builder /inscriptis /inscriptis
|
|
21
|
+
|
|
22
|
+
ENV PATH="/inscriptis/.venv/bin:$PATH"
|
|
23
|
+
CMD ["uvicorn", "inscriptis.service.web:app", "--port=5000", "--host=0.0.0.0"]
|
|
24
|
+
EXPOSE 5000
|
|
@@ -1,32 +1,32 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: inscriptis
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.7.0
|
|
4
4
|
Summary: inscriptis - HTML to text converter.
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
Project-URL: Homepage, https://github.com/weblyzard/inscriptis
|
|
6
|
+
Project-URL: Repository, https://github.com/weblyzard/inscriptis
|
|
7
|
+
Project-URL: Documentation, https://inscriptis.readthedocs.io/en/latest/
|
|
8
|
+
Author-email: Albert Weichselbraun <albert.weichselbraun@fhgr.ch>, Fabian Odoni <fabian.odoni@fhgr.ch>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
License-File: AUTHORS
|
|
11
|
+
License-File: LICENSE
|
|
7
12
|
Keywords: HTML,converter,text
|
|
8
|
-
Author: Albert Weichselbraun
|
|
9
|
-
Author-email: albert.weichselbraun@fhgr.ch
|
|
10
|
-
Requires-Python: >=3.9,<4.0
|
|
11
13
|
Classifier: Development Status :: 5 - Production/Stable
|
|
12
14
|
Classifier: Intended Audience :: Developers
|
|
13
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
15
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
21
|
Classifier: Topic :: Text Processing
|
|
21
22
|
Classifier: Topic :: Text Processing :: Markup :: HTML
|
|
22
23
|
Classifier: Topic :: Utilities
|
|
24
|
+
Requires-Python: <3.15,>=3.10
|
|
25
|
+
Requires-Dist: lxml<6.0.0,>=5.4.0
|
|
26
|
+
Requires-Dist: requests<3.0.0,>=2.32.3
|
|
23
27
|
Provides-Extra: web-service
|
|
24
|
-
Requires-Dist: fastapi
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist: requests (>=2.32.2)
|
|
27
|
-
Requires-Dist: uvicorn (>=0.34.0,<0.35.0) ; extra == "web-service"
|
|
28
|
-
Project-URL: Documentation, https://inscriptis.readthedocs.io/en
|
|
29
|
-
Project-URL: Repository, https://github.com/weblyzard/inscriptis
|
|
28
|
+
Requires-Dist: fastapi<1.0.0,>=0.115.11; extra == 'web-service'
|
|
29
|
+
Requires-Dist: uvicorn<1.0.0,>=0.34.0; extra == 'web-service'
|
|
30
30
|
Description-Content-Type: text/x-rst
|
|
31
31
|
|
|
32
32
|
==================================================================================
|
|
@@ -37,10 +37,6 @@ inscriptis -- HTML to text conversion library, command line client and Web servi
|
|
|
37
37
|
:target: https://badge.fury.io/py/inscriptis
|
|
38
38
|
:alt: Supported python versions
|
|
39
39
|
|
|
40
|
-
.. image:: https://api.codeclimate.com/v1/badges/f8ed73f8a764f2bc4eba/maintainability
|
|
41
|
-
:target: https://codeclimate.com/github/weblyzard/inscriptis/maintainability
|
|
42
|
-
:alt: Maintainability
|
|
43
|
-
|
|
44
40
|
.. image:: https://codecov.io/gh/weblyzard/inscriptis/branch/master/graph/badge.svg
|
|
45
41
|
:target: https://codecov.io/gh/weblyzard/inscriptis/
|
|
46
42
|
:alt: Coverage
|
|
@@ -523,41 +519,112 @@ be used within a program:
|
|
|
523
519
|
print("Text:", output['text'])
|
|
524
520
|
print("Annotations:", output['label'])
|
|
525
521
|
|
|
526
|
-
Fine
|
|
527
|
-
|
|
522
|
+
Fine-tuning the HTML rendering
|
|
523
|
+
------------------------------
|
|
528
524
|
|
|
529
|
-
|
|
525
|
+
Inscriptis provides the ``ParserConfig`` class to fine-tune the HTML rendering
|
|
526
|
+
(`see documentation <https://inscriptis.readthedocs.io/en/latest/api.html#inscriptis.model.config.ParserConfig>`_).
|
|
530
527
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
``<div>`` and ``<span>`` that do not provide indentation in their standard
|
|
534
|
-
definition. This strategy is the default in ``inscript`` and many other
|
|
535
|
-
tools such as Lynx. If you do not want extended indentation you can use the
|
|
536
|
-
parameter ``indentation='standard'`` instead.
|
|
528
|
+
It allows modifying the interpretation of HTML-tags and setting parameters that control the rendering of anchors,
|
|
529
|
+
captions, images and links.
|
|
537
530
|
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
below:
|
|
531
|
+
1. **Firefox-like whitespace handling:** Use the more standard-conform `strict` CSS_PROFILE to render the page.
|
|
532
|
+
(``<div>`` and ``<span>`` do not add whitespaces in the `strict` profile. Many text-based browsers such
|
|
533
|
+
as Lynx and ``inscript``, add whitespaces per default to reduce the likelihood of words getting glued together).
|
|
542
534
|
|
|
543
|
-
.. code-block:: python
|
|
535
|
+
.. code-block:: python
|
|
536
|
+
|
|
537
|
+
from lxml.html import fromstring
|
|
538
|
+
|
|
539
|
+
from inscriptis import Inscriptis
|
|
540
|
+
from inscriptis.css_profiles import CSS_PROFILES
|
|
541
|
+
from inscriptis.model.config import ParserConfig
|
|
542
|
+
|
|
543
|
+
# create a ParserConfig that uses the strict CSS rendering profile
|
|
544
|
+
css = CSS_PROFILES['strict']
|
|
545
|
+
config = ParserConfig(css=css)
|
|
546
|
+
|
|
547
|
+
html_tree = fromstring(html)
|
|
548
|
+
parser = Inscriptis(html_tree, config)
|
|
549
|
+
text = parser.get_text()
|
|
550
|
+
|
|
551
|
+
2. **Firefox-like whitespace handling and fine-tuning of link handling:** Use the strict profile
|
|
552
|
+
together with inline links and anchor URLs.
|
|
553
|
+
|
|
554
|
+
.. code-block:: python
|
|
555
|
+
|
|
556
|
+
from lxml.html import fromstring
|
|
557
|
+
|
|
558
|
+
from inscriptis import Inscriptis
|
|
559
|
+
from inscriptis.css_profiles import CSS_PROFILES
|
|
560
|
+
from inscriptis.model.config import ParserConfig
|
|
561
|
+
|
|
562
|
+
# uses the strict CSS rendering profile and fine-tune link handling.
|
|
563
|
+
css = CSS_PROFILES['strict']
|
|
564
|
+
config = ParserConfig(css=css, display_links=True,
|
|
565
|
+
display_anchors=True)
|
|
566
|
+
|
|
567
|
+
html_tree = fromstring(html)
|
|
568
|
+
parser = Inscriptis(html_tree, config)
|
|
569
|
+
text = parser.get_text()
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
3. **Overwriting the default CSS definition:** inscriptis uses CSS definitions
|
|
573
|
+
that are maintained in ``inscriptis.css_profiles_CSS_PROFILES`` for
|
|
574
|
+
rendering HTML tags. You can override these definitions (and therefore
|
|
575
|
+
change the rendering) as outlined below:
|
|
576
|
+
|
|
577
|
+
.. code-block:: python
|
|
578
|
+
|
|
579
|
+
from lxml.html import fromstring
|
|
580
|
+
|
|
581
|
+
from inscriptis import Inscriptis
|
|
582
|
+
from inscriptis.css_profiles import CSS_PROFILES
|
|
583
|
+
from inscriptis.html_properties import Display
|
|
584
|
+
from inscriptis.model.config import ParserConfig
|
|
585
|
+
from inscriptis.model.html_element import HtmlElement
|
|
586
|
+
|
|
587
|
+
# Create a custom CSS based on the default style sheet and change the
|
|
588
|
+
# rendering of `div` and `span` elements.
|
|
589
|
+
css = CSS_PROFILES['strict'].copy()
|
|
590
|
+
css['div'] = HtmlElement(display=Display.block, padding=2)
|
|
591
|
+
css['span'] = HtmlElement(prefix=' ', suffix=' ')
|
|
592
|
+
|
|
593
|
+
html_tree = fromstring(html)
|
|
594
|
+
# create a parser using a custom css
|
|
595
|
+
config = ParserConfig(css=css)
|
|
596
|
+
parser = Inscriptis(html_tree, config)
|
|
597
|
+
text = parser.get_text()
|
|
598
|
+
|
|
599
|
+
4. **Ignore elements during parsing:**
|
|
600
|
+
Overwriting the default CSS profile also allows changing the rendering of selected elements.
|
|
601
|
+
The snippet below, for example, removes forms from the parsed text by setting the definition of the ``form`` tag to ``Display.none``.
|
|
602
|
+
|
|
603
|
+
.. code-block:: python
|
|
604
|
+
|
|
605
|
+
from inscriptis import get_text
|
|
606
|
+
from inscriptis.css_profiles import CSS_PROFILES, HtmlElement
|
|
607
|
+
from inscriptis.html_properties import Display
|
|
608
|
+
from inscriptis.model.config import ParserConfig
|
|
609
|
+
|
|
610
|
+
# create a custom CSS based on the default style sheet and change the
|
|
611
|
+
# rendering of `div` and `span` elements
|
|
612
|
+
css = CSS_PROFILES['strict'].copy()
|
|
613
|
+
css['form'] = HtmlElement(display=Display.none)
|
|
614
|
+
|
|
615
|
+
# create a parser configuration using a custom css
|
|
616
|
+
html = """First line.
|
|
617
|
+
<form>
|
|
618
|
+
User data
|
|
619
|
+
<label for="name">Name:</label><br>
|
|
620
|
+
<input type="text" id="name" name="name"><br>
|
|
621
|
+
<label for="pass">Password:</label><br>
|
|
622
|
+
<input type="hidden" id="pass" name="pass">
|
|
623
|
+
</form>"""
|
|
624
|
+
config = ParserConfig(css=css)
|
|
625
|
+
text = get_text(html, config)
|
|
626
|
+
print(text)
|
|
544
627
|
|
|
545
|
-
from lxml.html import fromstring
|
|
546
|
-
from inscriptis.css_profiles import CSS_PROFILES, HtmlElement
|
|
547
|
-
from inscriptis.html_properties import Display
|
|
548
|
-
from inscriptis.model.config import ParserConfig
|
|
549
|
-
|
|
550
|
-
# create a custom CSS based on the default style sheet and change the
|
|
551
|
-
# rendering of `div` and `span` elements
|
|
552
|
-
css = CSS_PROFILES['strict'].copy()
|
|
553
|
-
css['div'] = HtmlElement(display=Display.block, padding=2)
|
|
554
|
-
css['span'] = HtmlElement(prefix=' ', suffix=' ')
|
|
555
|
-
|
|
556
|
-
html_tree = fromstring(html)
|
|
557
|
-
# create a parser using a custom css
|
|
558
|
-
config = ParserConfig(css=css)
|
|
559
|
-
parser = Inscriptis(html_tree, config)
|
|
560
|
-
text = parser.get_text()
|
|
561
628
|
|
|
562
629
|
|
|
563
630
|
Custom HTML tag handling
|
|
@@ -601,55 +668,6 @@ The following code mitigates this problem on Unix systems by manually forcing lx
|
|
|
601
668
|
return libc.malloc_trim(0)
|
|
602
669
|
|
|
603
670
|
|
|
604
|
-
Examples
|
|
605
|
-
========
|
|
606
|
-
|
|
607
|
-
Strict indentation handling
|
|
608
|
-
---------------------------
|
|
609
|
-
|
|
610
|
-
The following example demonstrates modifying ``ParserConfig`` for strict indentation handling.
|
|
611
|
-
|
|
612
|
-
.. code-block:: python
|
|
613
|
-
|
|
614
|
-
from inscriptis import get_text
|
|
615
|
-
from inscriptis.css_profiles import CSS_PROFILES
|
|
616
|
-
from inscriptis.model.config import ParserConfig
|
|
617
|
-
|
|
618
|
-
config = ParserConfig(css=CSS_PROFILES['strict'].copy())
|
|
619
|
-
text = get_text('fi<span>r</span>st', config)
|
|
620
|
-
print(text)
|
|
621
|
-
|
|
622
|
-
Ignore elements during parsing
|
|
623
|
-
------------------------------
|
|
624
|
-
|
|
625
|
-
Overwriting the default CSS profile also allows changing the rendering of selected elements.
|
|
626
|
-
The snippet below, for example, removes forms from the parsed text by setting the definition of the ``form`` tag to ``Display.none``.
|
|
627
|
-
|
|
628
|
-
.. code-block:: python
|
|
629
|
-
|
|
630
|
-
from inscriptis import get_text
|
|
631
|
-
from inscriptis.css_profiles import CSS_PROFILES, HtmlElement
|
|
632
|
-
from inscriptis.html_properties import Display
|
|
633
|
-
from inscriptis.model.config import ParserConfig
|
|
634
|
-
|
|
635
|
-
# create a custom CSS based on the default style sheet and change the
|
|
636
|
-
# rendering of `div` and `span` elements
|
|
637
|
-
css = CSS_PROFILES['strict'].copy()
|
|
638
|
-
css['form'] = HtmlElement(display=Display.none)
|
|
639
|
-
|
|
640
|
-
# create a parser configuration using a custom css
|
|
641
|
-
html = """First line.
|
|
642
|
-
<form>
|
|
643
|
-
User data
|
|
644
|
-
<label for="name">Name:</label><br>
|
|
645
|
-
<input type="text" id="name" name="name"><br>
|
|
646
|
-
<label for="pass">Password:</label><br>
|
|
647
|
-
<input type="hidden" id="pass" name="pass">
|
|
648
|
-
</form>"""
|
|
649
|
-
config = ParserConfig(css=css)
|
|
650
|
-
text = get_text(html, config)
|
|
651
|
-
print(text)
|
|
652
|
-
|
|
653
671
|
|
|
654
672
|
Citation
|
|
655
673
|
========
|
|
@@ -678,4 +696,3 @@ Changelog
|
|
|
678
696
|
A full list of changes can be found in the
|
|
679
697
|
`release notes <https://github.com/weblyzard/inscriptis/releases>`_.
|
|
680
698
|
|
|
681
|
-
|