distillery 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +4 -0
- data/lib/distillery/document.rb +20 -5
- data/lib/distillery/version.rb +1 -1
- data/spec/acceptance_spec.rb +5 -0
- data/spec/fixtures/bulgogi.html +992 -0
- data/spec/fixtures/forest_ham.html +1115 -0
- data/spec/lib/distillery/document_spec.rb +6 -0
- metadata +23 -21
@@ -263,6 +263,12 @@ module Distillery
|
|
263
263
|
::Nokogiri::HTML.fragment(doc.dup.distill!(:images => true)).css('img').should_not be_empty
|
264
264
|
end
|
265
265
|
|
266
|
+
it 'finds images when the image is wrapped by an unrelated element that would be cleaned' do
|
267
|
+
doc = Document.new(File.open('./spec/fixtures/forest_ham.html').read)
|
268
|
+
::Nokogiri::HTML.fragment(doc.dup.distill!).css('img').should be_empty
|
269
|
+
::Nokogiri::HTML.fragment(doc.dup.distill!(:images => true)).css('img').should_not be_empty
|
270
|
+
end
|
271
|
+
|
266
272
|
it 'works with a HTML document that has no winner' do
|
267
273
|
document_of('foo').distill!.should == 'foo'
|
268
274
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: distillery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-07-
|
13
|
-
default_executable:
|
12
|
+
date: 2011-07-31 00:00:00.000000000Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: nokogiri
|
17
|
-
requirement: &
|
16
|
+
requirement: &2152027420 !ruby/object:Gem::Requirement
|
18
17
|
none: false
|
19
18
|
requirements:
|
20
19
|
- - ! '>'
|
@@ -22,10 +21,10 @@ dependencies:
|
|
22
21
|
version: '1.0'
|
23
22
|
type: :runtime
|
24
23
|
prerelease: false
|
25
|
-
version_requirements: *
|
24
|
+
version_requirements: *2152027420
|
26
25
|
- !ruby/object:Gem::Dependency
|
27
26
|
name: slop
|
28
|
-
requirement: &
|
27
|
+
requirement: &2152026920 !ruby/object:Gem::Requirement
|
29
28
|
none: false
|
30
29
|
requirements:
|
31
30
|
- - ! '>'
|
@@ -33,10 +32,10 @@ dependencies:
|
|
33
32
|
version: '1.0'
|
34
33
|
type: :runtime
|
35
34
|
prerelease: false
|
36
|
-
version_requirements: *
|
35
|
+
version_requirements: *2152026920
|
37
36
|
- !ruby/object:Gem::Dependency
|
38
37
|
name: rspec
|
39
|
-
requirement: &
|
38
|
+
requirement: &2152026460 !ruby/object:Gem::Requirement
|
40
39
|
none: false
|
41
40
|
requirements:
|
42
41
|
- - ! '>'
|
@@ -44,10 +43,10 @@ dependencies:
|
|
44
43
|
version: '2.0'
|
45
44
|
type: :development
|
46
45
|
prerelease: false
|
47
|
-
version_requirements: *
|
46
|
+
version_requirements: *2152026460
|
48
47
|
- !ruby/object:Gem::Dependency
|
49
48
|
name: guard
|
50
|
-
requirement: &
|
49
|
+
requirement: &2152026080 !ruby/object:Gem::Requirement
|
51
50
|
none: false
|
52
51
|
requirements:
|
53
52
|
- - ! '>='
|
@@ -55,10 +54,10 @@ dependencies:
|
|
55
54
|
version: '0'
|
56
55
|
type: :development
|
57
56
|
prerelease: false
|
58
|
-
version_requirements: *
|
57
|
+
version_requirements: *2152026080
|
59
58
|
- !ruby/object:Gem::Dependency
|
60
59
|
name: guard-rspec
|
61
|
-
requirement: &
|
60
|
+
requirement: &2152025620 !ruby/object:Gem::Requirement
|
62
61
|
none: false
|
63
62
|
requirements:
|
64
63
|
- - ! '>='
|
@@ -66,10 +65,10 @@ dependencies:
|
|
66
65
|
version: '0'
|
67
66
|
type: :development
|
68
67
|
prerelease: false
|
69
|
-
version_requirements: *
|
68
|
+
version_requirements: *2152025620
|
70
69
|
- !ruby/object:Gem::Dependency
|
71
70
|
name: ruby-debug19
|
72
|
-
requirement: &
|
71
|
+
requirement: &2152025200 !ruby/object:Gem::Requirement
|
73
72
|
none: false
|
74
73
|
requirements:
|
75
74
|
- - ! '>='
|
@@ -77,10 +76,10 @@ dependencies:
|
|
77
76
|
version: '0'
|
78
77
|
type: :development
|
79
78
|
prerelease: false
|
80
|
-
version_requirements: *
|
79
|
+
version_requirements: *2152025200
|
81
80
|
- !ruby/object:Gem::Dependency
|
82
81
|
name: rb-fsevent
|
83
|
-
requirement: &
|
82
|
+
requirement: &2152024780 !ruby/object:Gem::Requirement
|
84
83
|
none: false
|
85
84
|
requirements:
|
86
85
|
- - ! '>='
|
@@ -88,10 +87,10 @@ dependencies:
|
|
88
87
|
version: '0'
|
89
88
|
type: :development
|
90
89
|
prerelease: false
|
91
|
-
version_requirements: *
|
90
|
+
version_requirements: *2152024780
|
92
91
|
- !ruby/object:Gem::Dependency
|
93
92
|
name: growl
|
94
|
-
requirement: &
|
93
|
+
requirement: &2152024360 !ruby/object:Gem::Requirement
|
95
94
|
none: false
|
96
95
|
requirements:
|
97
96
|
- - ! '>='
|
@@ -99,7 +98,7 @@ dependencies:
|
|
99
98
|
version: '0'
|
100
99
|
type: :development
|
101
100
|
prerelease: false
|
102
|
-
version_requirements: *
|
101
|
+
version_requirements: *2152024360
|
103
102
|
description: Distillery extracts the "content" portion out of an HTML document. It
|
104
103
|
applies heuristics based on element type, location, class/id name and other attributes
|
105
104
|
to try and find the content part of the HTML document and return it.
|
@@ -128,8 +127,10 @@ files:
|
|
128
127
|
- spec/fixtures/baked_ziti.html
|
129
128
|
- spec/fixtures/beef_jerkey.html
|
130
129
|
- spec/fixtures/bourbon_balls.html
|
130
|
+
- spec/fixtures/bulgogi.html
|
131
131
|
- spec/fixtures/clams_and_linguini.html
|
132
132
|
- spec/fixtures/clouds_shining_moment.html
|
133
|
+
- spec/fixtures/forest_ham.html
|
133
134
|
- spec/fixtures/game_blog.html
|
134
135
|
- spec/fixtures/ginger_cookies.html
|
135
136
|
- spec/fixtures/js_this_keyword.html
|
@@ -140,7 +141,6 @@ files:
|
|
140
141
|
- spec/lib/distillery/document_spec.rb
|
141
142
|
- spec/lib/distillery_spec.rb
|
142
143
|
- spec/spec_helper.rb
|
143
|
-
has_rdoc: true
|
144
144
|
homepage: https://github.com/Fluxx/distillery
|
145
145
|
licenses: []
|
146
146
|
post_install_message:
|
@@ -161,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
161
|
version: '0'
|
162
162
|
requirements: []
|
163
163
|
rubyforge_project: distillery
|
164
|
-
rubygems_version: 1.6
|
164
|
+
rubygems_version: 1.8.6
|
165
165
|
signing_key:
|
166
166
|
specification_version: 3
|
167
167
|
summary: Extract the content portion of an HTML document.
|
@@ -172,8 +172,10 @@ test_files:
|
|
172
172
|
- spec/fixtures/baked_ziti.html
|
173
173
|
- spec/fixtures/beef_jerkey.html
|
174
174
|
- spec/fixtures/bourbon_balls.html
|
175
|
+
- spec/fixtures/bulgogi.html
|
175
176
|
- spec/fixtures/clams_and_linguini.html
|
176
177
|
- spec/fixtures/clouds_shining_moment.html
|
178
|
+
- spec/fixtures/forest_ham.html
|
177
179
|
- spec/fixtures/game_blog.html
|
178
180
|
- spec/fixtures/ginger_cookies.html
|
179
181
|
- spec/fixtures/js_this_keyword.html
|