plainhtml 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plainhtml-0.3.0/.github/workflows/main.yml +23 -0
- plainhtml-0.3.0/.gitignore +64 -0
- plainhtml-0.3.0/PKG-INFO +32 -0
- {plainhtml-0.2.2 → plainhtml-0.3.0}/README.md +7 -1
- plainhtml-0.3.0/pyproject.toml +23 -0
- plainhtml-0.3.0/tests/test_parser.py +107 -0
- plainhtml-0.3.0/tests/testcases/A Light in the Attic | Books to Scrape - Sandbox.html +361 -0
- plainhtml-0.3.0/tests/testcases/A Light in the Attic | Books to Scrape - Sandbox.txt +30 -0
- plainhtml-0.3.0/tests/testcases/IANA /342/200/224 IANA-managed Reserved Domains.html" +233 -0
- plainhtml-0.3.0/tests/testcases/IANA /342/200/224 IANA-managed Reserved Domains.txt" +105 -0
- plainhtml-0.3.0/tests/testcases/Scrapinghub Enterprise Solutions.html +3 -0
- plainhtml-0.3.0/tests/testcases/Scrapinghub Enterprise Solutions.txt +230 -0
- plainhtml-0.3.0/tests/testcases/Tutorial /342/200/224 Webstruct 0.6 documentation.html" +590 -0
- plainhtml-0.3.0/tests/testcases/Tutorial /342/200/224 Webstruct 0.6 documentation.txt" +214 -0
- plainhtml-0.3.0/tests/testcases/Webstruct /342/200/224 Webstruct 0.6 documentation.html" +357 -0
- plainhtml-0.3.0/tests/testcases/Webstruct /342/200/224 Webstruct 0.6 documentation.txt" +91 -0
- plainhtml-0.3.0/uv.lock +324 -0
- plainhtml-0.2.2/PKG-INFO +0 -31
- plainhtml-0.2.2/pyproject.toml +0 -21
- {plainhtml-0.2.2 → plainhtml-0.3.0}/LICENSE +0 -0
- {plainhtml-0.2.2 → plainhtml-0.3.0}/plainhtml/__init__.py +0 -0
- {plainhtml-0.2.2 → plainhtml-0.3.0}/plainhtml/core.py +0 -0
- {plainhtml-0.2.2 → plainhtml-0.3.0}/plainhtml/parser.py +0 -0
- {plainhtml-0.2.2 → plainhtml-0.3.0}/plainhtml/utils.py +0 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: CI & CD
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- master
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
|
|
12
|
+
steps:
|
|
13
|
+
- name: Check out repository
|
|
14
|
+
uses: actions/checkout@v6
|
|
15
|
+
|
|
16
|
+
- name: Install uv
|
|
17
|
+
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57
|
|
18
|
+
|
|
19
|
+
- name: Build wheels
|
|
20
|
+
run: uv build
|
|
21
|
+
|
|
22
|
+
- name: Publish wheels
|
|
23
|
+
run: uv publish --token ${{ secrets.PYPI_TOKEN }}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
env/
|
|
12
|
+
build/
|
|
13
|
+
develop-eggs/
|
|
14
|
+
dist/
|
|
15
|
+
downloads/
|
|
16
|
+
eggs/
|
|
17
|
+
.eggs/
|
|
18
|
+
lib/
|
|
19
|
+
lib64/
|
|
20
|
+
parts/
|
|
21
|
+
sdist/
|
|
22
|
+
var/
|
|
23
|
+
*.egg-info/
|
|
24
|
+
.installed.cfg
|
|
25
|
+
*.egg
|
|
26
|
+
|
|
27
|
+
# PyInstaller
|
|
28
|
+
# Usually these files are written by a python script from a template
|
|
29
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
30
|
+
*.manifest
|
|
31
|
+
*.spec
|
|
32
|
+
|
|
33
|
+
# Installer logs
|
|
34
|
+
pip-log.txt
|
|
35
|
+
pip-delete-this-directory.txt
|
|
36
|
+
|
|
37
|
+
# Unit test / coverage reports
|
|
38
|
+
htmlcov/
|
|
39
|
+
.tox/
|
|
40
|
+
.coverage
|
|
41
|
+
.coverage.*
|
|
42
|
+
.cache
|
|
43
|
+
nosetests.xml
|
|
44
|
+
coverage.xml
|
|
45
|
+
*,cover
|
|
46
|
+
.hypothesis/
|
|
47
|
+
.pytest_cache
|
|
48
|
+
|
|
49
|
+
# Translations
|
|
50
|
+
*.mo
|
|
51
|
+
*.pot
|
|
52
|
+
|
|
53
|
+
# Django stuff:
|
|
54
|
+
*.log
|
|
55
|
+
|
|
56
|
+
# Sphinx documentation
|
|
57
|
+
docs/_build/
|
|
58
|
+
|
|
59
|
+
# PyBuilder
|
|
60
|
+
target/
|
|
61
|
+
|
|
62
|
+
.env
|
|
63
|
+
.vscode
|
|
64
|
+
.venv
|
plainhtml-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: plainhtml
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Extract plain text from HTML
|
|
5
|
+
Author-email: Severin Simmler <s.simmler@snapaddy.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: lxml[html-clean]==6.0.4
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# Extract plain text from HTML
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
$ uv add plainhtml
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
For development in this repository:
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
$ uv sync --dev
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Example
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
>>> import plainhtml
|
|
29
|
+
>>> html = "<html><body><p>foo</p><p>bar</p></body></html>"
|
|
30
|
+
>>> plainhtml.extract_text(html)
|
|
31
|
+
'foo\n\nbar'
|
|
32
|
+
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "plainhtml"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "Extract plain text from HTML"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
authors = [{ name = "Severin Simmler", email = "s.simmler@snapaddy.com" }]
|
|
8
|
+
dependencies = [
|
|
9
|
+
"lxml[html_clean]==6.0.4",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
[dependency-groups]
|
|
13
|
+
dev = [
|
|
14
|
+
"ruff>=0.2.1",
|
|
15
|
+
"pytest>=8.0.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[tool.ruff.lint.per-file-ignores]
|
|
19
|
+
"__init__.py" = ["F401"]
|
|
20
|
+
|
|
21
|
+
[build-system]
|
|
22
|
+
requires = ["hatchling>=1.24.0"]
|
|
23
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import plainhtml
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_extract_no_text_html():
|
|
5
|
+
html = (
|
|
6
|
+
'<!DOCTYPE html><html><body><p><video width="320" height="240" '
|
|
7
|
+
'controls><source src="movie.mp4" type="video/mp4"><source '
|
|
8
|
+
'src="movie.ogg" type="video/ogg"></video></p></body></html>'
|
|
9
|
+
)
|
|
10
|
+
assert plainhtml.extract(html) == ""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_extract():
|
|
14
|
+
html = "<html><style>.div {}</style>" "<body><p>Hello, world!</body></html>"
|
|
15
|
+
assert plainhtml.extract(html) == "Hello, world!"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_declared_encoding():
|
|
19
|
+
html = (
|
|
20
|
+
'<?xml version="1.0" encoding="utf-8" ?>'
|
|
21
|
+
"<html><style>.div {}</style>"
|
|
22
|
+
"<body>Hello, world!</p></body></html>"
|
|
23
|
+
)
|
|
24
|
+
assert plainhtml.extract(html) == "Hello, world!"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_empty():
|
|
28
|
+
assert plainhtml.extract("") == ""
|
|
29
|
+
assert plainhtml.extract(" ") == ""
|
|
30
|
+
assert plainhtml.extract(None) == ""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_inline_tags_whitespace():
|
|
34
|
+
html = "<span>field</span><span>value of</span><span></span>"
|
|
35
|
+
assert plainhtml.extract(html) == "field value of"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_nbsp():
|
|
39
|
+
html = "<h1>Foo Bar</h1>"
|
|
40
|
+
assert plainhtml.extract(html) == "Foo Bar"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_adjust_newline():
|
|
44
|
+
html = "<div>text 1</div><p><div>text 2</div></p>"
|
|
45
|
+
assert plainhtml.extract(html) == "text 1\n\ntext 2"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def test_punct_whitespace():
|
|
51
|
+
html = '<div><span>field</span>, and more</div>'
|
|
52
|
+
assert plainhtml.extract(html) == "nice"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_punct_whitespace_preserved():
|
|
57
|
+
html = (u'<div><span>по</span><span>ле</span>, and , '
|
|
58
|
+
u'<span>more </span>!<span>now</div>a (<b>boo</b>)')
|
|
59
|
+
text = plainhtml.extract(html)
|
|
60
|
+
assert text == u'по ле, and , more ! now a (boo)'
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_guess_layout():
|
|
64
|
+
html = (u'<title> title </title><div>text_1.<p>text_2 text_3</p>'
|
|
65
|
+
'<p id="demo"></p><ul><li>text_4</li><li>text_5</li></ul>'
|
|
66
|
+
'<p>text_6<em>text_7</em>text_8</p>text_9</div>'
|
|
67
|
+
'<script>document.getElementById("demo").innerHTML = '
|
|
68
|
+
'"This should be skipped";</script> <p>...text_10</p>')
|
|
69
|
+
|
|
70
|
+
text = 'title text_1. text_2 text_3 text_4 text_5 text_6 text_7 ' \
|
|
71
|
+
'text_8 text_9 ...text_10'
|
|
72
|
+
assert plainhtml.extract(html, guess_punct_space=False, guess_layout=False) == text
|
|
73
|
+
|
|
74
|
+
text = ('title\n\ntext_1.\n\ntext_2 text_3\n\ntext_4\ntext_5'
|
|
75
|
+
'\n\ntext_6 text_7 text_8\n\ntext_9\n\n...text_10')
|
|
76
|
+
assert plainhtml.extract(html, guess_punct_space=False, guess_layout=True) == text
|
|
77
|
+
|
|
78
|
+
text = 'title text_1. text_2 text_3 text_4 text_5 text_6 text_7 ' \
|
|
79
|
+
'text_8 text_9...text_10'
|
|
80
|
+
assert plainhtml.extract(html, guess_punct_space=True, guess_layout=False) == text
|
|
81
|
+
|
|
82
|
+
text = 'title\n\ntext_1.\n\ntext_2 text_3\n\ntext_4\ntext_5\n\n' \
|
|
83
|
+
'text_6 text_7 text_8\n\ntext_9\n\n...text_10'
|
|
84
|
+
assert plainhtml.extract(html, guess_punct_space=True, guess_layout=True) == text
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_basic_newline():
|
|
88
|
+
html = u'<div>a</div><div>b</div>'
|
|
89
|
+
assert plainhtml.extract(html, guess_punct_space=False, guess_layout=False) == 'a b'
|
|
90
|
+
assert plainhtml.extract(html, guess_punct_space=False, guess_layout=True) == 'a\nb'
|
|
91
|
+
assert plainhtml.extract(html, guess_punct_space=True, guess_layout=False) == 'a b'
|
|
92
|
+
assert plainhtml.extract(html, guess_punct_space=True, guess_layout=True) == 'a\nb'
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_personalize_newlines_sets():
|
|
96
|
+
html = (u'<span><span>text<a>more</a>'
|
|
97
|
+
'</span>and more text <a> and some more</a> <a></a> </span>')
|
|
98
|
+
|
|
99
|
+
text = plainhtml.extract(html, guess_layout=True,
|
|
100
|
+
newline_tags=NEWLINE_TAGS | {'a'})
|
|
101
|
+
assert text == 'text\nmore\nand more text\nand some more'
|
|
102
|
+
|
|
103
|
+
text = plainhtml.extract(html, guess_layout=True,
|
|
104
|
+
double_newline_tags=DOUBLE_NEWLINE_TAGS | {'a'})
|
|
105
|
+
assert text == 'text\n\nmore\n\nand more text\n\nand some more'
|
|
106
|
+
|
|
107
|
+
"""
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
<!DOCTYPE html>
|
|
4
|
+
<!--[if lt IE 7]> <html lang="en-us" class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
|
|
5
|
+
<!--[if IE 7]> <html lang="en-us" class="no-js lt-ie9 lt-ie8"> <![endif]-->
|
|
6
|
+
<!--[if IE 8]> <html lang="en-us" class="no-js lt-ie9"> <![endif]-->
|
|
7
|
+
<!--[if gt IE 8]><!--> <html lang="en-us" class="no-js"> <!--<![endif]-->
|
|
8
|
+
<head>
|
|
9
|
+
<title>
|
|
10
|
+
A Light in the Attic | Books to Scrape - Sandbox
|
|
11
|
+
</title>
|
|
12
|
+
|
|
13
|
+
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
|
|
14
|
+
<meta name="created" content="24th Jun 2016 09:29" />
|
|
15
|
+
<meta name="description" content="
|
|
16
|
+
It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love th It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love that Silverstein. Need proof of his genius? RockabyeRockabye baby, in the treetopDon't you know a treetopIs no safe place to rock?And who put you up there,And your cradle, too?Baby, I think someone down here'sGot it in for you. Shel, you never sounded so good. ...more
|
|
17
|
+
" />
|
|
18
|
+
<meta name="viewport" content="width=device-width" />
|
|
19
|
+
<meta name="robots" content="NOARCHIVE,NOCACHE" />
|
|
20
|
+
|
|
21
|
+
<!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
|
|
22
|
+
<!--[if lt IE 9]>
|
|
23
|
+
<script src="//html5shim.googlecode.com/svn/trunk/html5.js"></script>
|
|
24
|
+
<![endif]-->
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
<link rel="shortcut icon" href="../../static/oscar/favicon.ico" />
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
<link rel="stylesheet" type="text/css" href="../../static/oscar/css/styles.css" />
|
|
35
|
+
|
|
36
|
+
<link rel="stylesheet" href="../../static/oscar/js/bootstrap-datetimepicker/bootstrap-datetimepicker.css" />
|
|
37
|
+
<link rel="stylesheet" type="text/css" href="../../static/oscar/css/datetimepicker.css" />
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
</head>
|
|
51
|
+
|
|
52
|
+
<body id="default" class="default">
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
<header class="header container-fluid">
|
|
58
|
+
<div class="page_inner">
|
|
59
|
+
<div class="row">
|
|
60
|
+
<div class="col-sm-8 h1"><a href="../../index.html">Books to Scrape</a><small> We love being scraped!</small>
|
|
61
|
+
</div>
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
</div>
|
|
65
|
+
</div>
|
|
66
|
+
</header>
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
<div class="container-fluid page">
|
|
71
|
+
<div class="page_inner">
|
|
72
|
+
|
|
73
|
+
<ul class="breadcrumb">
|
|
74
|
+
<li>
|
|
75
|
+
<a href="../../index.html">Home</a>
|
|
76
|
+
</li>
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
<li>
|
|
80
|
+
<a href="../category/books_1/index.html">Books</a>
|
|
81
|
+
</li>
|
|
82
|
+
|
|
83
|
+
<li>
|
|
84
|
+
<a href="../category/books/poetry_23/index.html">Poetry</a>
|
|
85
|
+
</li>
|
|
86
|
+
|
|
87
|
+
<li class="active">A Light in the Attic</li>
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
</ul>
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
<div id="messages">
|
|
101
|
+
|
|
102
|
+
</div>
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
<div class="content">
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
<div id="promotions">
|
|
110
|
+
|
|
111
|
+
</div>
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
<div id="content_inner">
|
|
115
|
+
|
|
116
|
+
<article class="product_page"><!-- Start of product page -->
|
|
117
|
+
|
|
118
|
+
<div class="row">
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
<div class="col-sm-6">
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
<div id="product_gallery" class="carousel">
|
|
133
|
+
<div class="thumbnail">
|
|
134
|
+
<div class="carousel-inner">
|
|
135
|
+
<div class="item active">
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
<img src="../../media/cache/fe/72/fe72f0532301ec28892ae79a629a293c.jpg" alt="A Light in the Attic" />
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
</div>
|
|
142
|
+
</div>
|
|
143
|
+
</div>
|
|
144
|
+
</div>
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
</div>
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
<div class="col-sm-6 product_main">
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
<h1>A Light in the Attic</h1>
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
<p class="price_color">£51.77</p>
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
<p class="instock availability">
|
|
171
|
+
<i class="icon-ok"></i>
|
|
172
|
+
|
|
173
|
+
In stock (22 available)
|
|
174
|
+
|
|
175
|
+
</p>
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
<p class="star-rating Three">
|
|
185
|
+
<i class="icon-star"></i>
|
|
186
|
+
<i class="icon-star"></i>
|
|
187
|
+
<i class="icon-star"></i>
|
|
188
|
+
<i class="icon-star"></i>
|
|
189
|
+
<i class="icon-star"></i>
|
|
190
|
+
|
|
191
|
+
<!-- <small><a href="/catalogue/a-light-in-the-attic_1000/reviews/">
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
0 customer reviews
|
|
195
|
+
|
|
196
|
+
</a></small>
|
|
197
|
+
-->
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
<!--
|
|
201
|
+
<a id="write_review" href="/catalogue/a-light-in-the-attic_1000/reviews/add/#addreview" class="btn btn-success btn-sm">
|
|
202
|
+
Write a review
|
|
203
|
+
</a>
|
|
204
|
+
|
|
205
|
+
--></p>
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
<hr/>
|
|
210
|
+
|
|
211
|
+
<div class="alert alert-warning" role="alert"><strong>Warning!</strong> This is a demo website for web scraping purposes. Prices and ratings here were randomly assigned and have no real meaning.</div>
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
</div><!-- /col-sm-6 -->
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
</div><!-- /row -->
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
<div id="product_description" class="sub-header">
|
|
231
|
+
<h2>Product Description</h2>
|
|
232
|
+
</div>
|
|
233
|
+
<p>It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love th It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love that Silverstein. Need proof of his genius? RockabyeRockabye baby, in the treetopDon't you know a treetopIs no safe place to rock?And who put you up there,And your cradle, too?Baby, I think someone down here'sGot it in for you. Shel, you never sounded so good. ...more</p>
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
<div class="sub-header">
|
|
239
|
+
<h2>Product Information</h2>
|
|
240
|
+
</div>
|
|
241
|
+
<table class="table table-striped">
|
|
242
|
+
|
|
243
|
+
<tr>
|
|
244
|
+
<th>UPC</th><td>a897fe39b1053632</td>
|
|
245
|
+
</tr>
|
|
246
|
+
|
|
247
|
+
<tr>
|
|
248
|
+
<th>Product Type</th><td>Books</td>
|
|
249
|
+
</tr>
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
<tr>
|
|
254
|
+
<th>Price (excl. tax)</th><td>£51.77</td>
|
|
255
|
+
</tr>
|
|
256
|
+
|
|
257
|
+
<tr>
|
|
258
|
+
<th>Price (incl. tax)</th><td>£51.77</td>
|
|
259
|
+
</tr>
|
|
260
|
+
<tr>
|
|
261
|
+
<th>Tax</th><td>£0.00</td>
|
|
262
|
+
</tr>
|
|
263
|
+
|
|
264
|
+
<tr>
|
|
265
|
+
<th>Availability</th>
|
|
266
|
+
<td>In stock (22 available)</td>
|
|
267
|
+
</tr>
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
<tr>
|
|
272
|
+
<th>Number of reviews</th>
|
|
273
|
+
<td>0</td>
|
|
274
|
+
</tr>
|
|
275
|
+
|
|
276
|
+
</table>
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
<section>
|
|
282
|
+
<div id="reviews" class="sub-header">
|
|
283
|
+
</div>
|
|
284
|
+
</section>
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
</article><!-- End of product page -->
|
|
301
|
+
</div>
|
|
302
|
+
</div>
|
|
303
|
+
</div>
|
|
304
|
+
</div>
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
<footer class="footer container-fluid">
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
</footer>
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
<!-- jQuery -->
|
|
319
|
+
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
|
|
320
|
+
<script>window.jQuery || document.write('<script src="../../static/oscar/js/jquery/jquery-1.9.1.min.js"><\/script>')</script>
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
<!-- Twitter Bootstrap -->
|
|
330
|
+
<script type="text/javascript" src="../../static/oscar/js/bootstrap3/bootstrap.min.js"></script>
|
|
331
|
+
<!-- Oscar -->
|
|
332
|
+
<script src="../../static/oscar/js/oscar/ui.js" type="text/javascript" charset="utf-8"></script>
|
|
333
|
+
|
|
334
|
+
<script src="../../static/oscar/js/bootstrap-datetimepicker/bootstrap-datetimepicker.js" type="text/javascript" charset="utf-8"></script>
|
|
335
|
+
<script src="../../static/oscar/js/bootstrap-datetimepicker/locales/bootstrap-datetimepicker.all.js" type="text/javascript" charset="utf-8"></script>
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
<script type="text/javascript">
|
|
349
|
+
$(function() {
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
oscar.init();
|
|
353
|
+
|
|
354
|
+
});
|
|
355
|
+
</script>
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
<!-- Version: N/A -->
|
|
359
|
+
|
|
360
|
+
</body>
|
|
361
|
+
</html>
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
A Light in the Attic | Books to Scrape - Sandbox
|
|
2
|
+
|
|
3
|
+
Books to Scrape We love being scraped!
|
|
4
|
+
|
|
5
|
+
Home
|
|
6
|
+
Books
|
|
7
|
+
Poetry
|
|
8
|
+
A Light in the Attic
|
|
9
|
+
|
|
10
|
+
A Light in the Attic
|
|
11
|
+
|
|
12
|
+
£51.77
|
|
13
|
+
|
|
14
|
+
In stock (22 available)
|
|
15
|
+
|
|
16
|
+
Warning! This is a demo website for web scraping purposes. Prices and ratings here were randomly assigned and have no real meaning.
|
|
17
|
+
|
|
18
|
+
Product Description
|
|
19
|
+
|
|
20
|
+
It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love th It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love that Silverstein. Need proof of his genius? RockabyeRockabye baby, in the treetopDon't you know a treetopIs no safe place to rock?And who put you up there,And your cradle, too?Baby, I think someone down here'sGot it in for you. Shel, you never sounded so good. ...more
|
|
21
|
+
|
|
22
|
+
Product Information
|
|
23
|
+
|
|
24
|
+
UPC a897fe39b1053632
|
|
25
|
+
Product Type Books
|
|
26
|
+
Price (excl. tax) £51.77
|
|
27
|
+
Price (incl. tax) £51.77
|
|
28
|
+
Tax £0.00
|
|
29
|
+
Availability In stock (22 available)
|
|
30
|
+
Number of reviews 0
|