distillery 0.2.1 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +4 -0
- data/lib/distillery/document.rb +20 -5
- data/lib/distillery/version.rb +1 -1
- data/spec/acceptance_spec.rb +5 -0
- data/spec/fixtures/bulgogi.html +992 -0
- data/spec/fixtures/forest_ham.html +1115 -0
- data/spec/lib/distillery/document_spec.rb +6 -0
- metadata +23 -21
@@ -263,6 +263,12 @@ module Distillery
|
|
263
263
|
::Nokogiri::HTML.fragment(doc.dup.distill!(:images => true)).css('img').should_not be_empty
|
264
264
|
end
|
265
265
|
|
266
|
+
it 'finds images when the image is wrapped by an unrelated element that would be cleaned' do
|
267
|
+
doc = Document.new(File.open('./spec/fixtures/forest_ham.html').read)
|
268
|
+
::Nokogiri::HTML.fragment(doc.dup.distill!).css('img').should be_empty
|
269
|
+
::Nokogiri::HTML.fragment(doc.dup.distill!(:images => true)).css('img').should_not be_empty
|
270
|
+
end
|
271
|
+
|
266
272
|
it 'works with a HTML document that has no winner' do
|
267
273
|
document_of('foo').distill!.should == 'foo'
|
268
274
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: distillery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-07-
|
13
|
-
default_executable:
|
12
|
+
date: 2011-07-31 00:00:00.000000000Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: nokogiri
|
17
|
-
requirement: &
|
16
|
+
requirement: &2152027420 !ruby/object:Gem::Requirement
|
18
17
|
none: false
|
19
18
|
requirements:
|
20
19
|
- - ! '>'
|
@@ -22,10 +21,10 @@ dependencies:
|
|
22
21
|
version: '1.0'
|
23
22
|
type: :runtime
|
24
23
|
prerelease: false
|
25
|
-
version_requirements: *
|
24
|
+
version_requirements: *2152027420
|
26
25
|
- !ruby/object:Gem::Dependency
|
27
26
|
name: slop
|
28
|
-
requirement: &
|
27
|
+
requirement: &2152026920 !ruby/object:Gem::Requirement
|
29
28
|
none: false
|
30
29
|
requirements:
|
31
30
|
- - ! '>'
|
@@ -33,10 +32,10 @@ dependencies:
|
|
33
32
|
version: '1.0'
|
34
33
|
type: :runtime
|
35
34
|
prerelease: false
|
36
|
-
version_requirements: *
|
35
|
+
version_requirements: *2152026920
|
37
36
|
- !ruby/object:Gem::Dependency
|
38
37
|
name: rspec
|
39
|
-
requirement: &
|
38
|
+
requirement: &2152026460 !ruby/object:Gem::Requirement
|
40
39
|
none: false
|
41
40
|
requirements:
|
42
41
|
- - ! '>'
|
@@ -44,10 +43,10 @@ dependencies:
|
|
44
43
|
version: '2.0'
|
45
44
|
type: :development
|
46
45
|
prerelease: false
|
47
|
-
version_requirements: *
|
46
|
+
version_requirements: *2152026460
|
48
47
|
- !ruby/object:Gem::Dependency
|
49
48
|
name: guard
|
50
|
-
requirement: &
|
49
|
+
requirement: &2152026080 !ruby/object:Gem::Requirement
|
51
50
|
none: false
|
52
51
|
requirements:
|
53
52
|
- - ! '>='
|
@@ -55,10 +54,10 @@ dependencies:
|
|
55
54
|
version: '0'
|
56
55
|
type: :development
|
57
56
|
prerelease: false
|
58
|
-
version_requirements: *
|
57
|
+
version_requirements: *2152026080
|
59
58
|
- !ruby/object:Gem::Dependency
|
60
59
|
name: guard-rspec
|
61
|
-
requirement: &
|
60
|
+
requirement: &2152025620 !ruby/object:Gem::Requirement
|
62
61
|
none: false
|
63
62
|
requirements:
|
64
63
|
- - ! '>='
|
@@ -66,10 +65,10 @@ dependencies:
|
|
66
65
|
version: '0'
|
67
66
|
type: :development
|
68
67
|
prerelease: false
|
69
|
-
version_requirements: *
|
68
|
+
version_requirements: *2152025620
|
70
69
|
- !ruby/object:Gem::Dependency
|
71
70
|
name: ruby-debug19
|
72
|
-
requirement: &
|
71
|
+
requirement: &2152025200 !ruby/object:Gem::Requirement
|
73
72
|
none: false
|
74
73
|
requirements:
|
75
74
|
- - ! '>='
|
@@ -77,10 +76,10 @@ dependencies:
|
|
77
76
|
version: '0'
|
78
77
|
type: :development
|
79
78
|
prerelease: false
|
80
|
-
version_requirements: *
|
79
|
+
version_requirements: *2152025200
|
81
80
|
- !ruby/object:Gem::Dependency
|
82
81
|
name: rb-fsevent
|
83
|
-
requirement: &
|
82
|
+
requirement: &2152024780 !ruby/object:Gem::Requirement
|
84
83
|
none: false
|
85
84
|
requirements:
|
86
85
|
- - ! '>='
|
@@ -88,10 +87,10 @@ dependencies:
|
|
88
87
|
version: '0'
|
89
88
|
type: :development
|
90
89
|
prerelease: false
|
91
|
-
version_requirements: *
|
90
|
+
version_requirements: *2152024780
|
92
91
|
- !ruby/object:Gem::Dependency
|
93
92
|
name: growl
|
94
|
-
requirement: &
|
93
|
+
requirement: &2152024360 !ruby/object:Gem::Requirement
|
95
94
|
none: false
|
96
95
|
requirements:
|
97
96
|
- - ! '>='
|
@@ -99,7 +98,7 @@ dependencies:
|
|
99
98
|
version: '0'
|
100
99
|
type: :development
|
101
100
|
prerelease: false
|
102
|
-
version_requirements: *
|
101
|
+
version_requirements: *2152024360
|
103
102
|
description: Distillery extracts the "content" portion out of an HTML document. It
|
104
103
|
applies heuristics based on element type, location, class/id name and other attributes
|
105
104
|
to try and find the content part of the HTML document and return it.
|
@@ -128,8 +127,10 @@ files:
|
|
128
127
|
- spec/fixtures/baked_ziti.html
|
129
128
|
- spec/fixtures/beef_jerkey.html
|
130
129
|
- spec/fixtures/bourbon_balls.html
|
130
|
+
- spec/fixtures/bulgogi.html
|
131
131
|
- spec/fixtures/clams_and_linguini.html
|
132
132
|
- spec/fixtures/clouds_shining_moment.html
|
133
|
+
- spec/fixtures/forest_ham.html
|
133
134
|
- spec/fixtures/game_blog.html
|
134
135
|
- spec/fixtures/ginger_cookies.html
|
135
136
|
- spec/fixtures/js_this_keyword.html
|
@@ -140,7 +141,6 @@ files:
|
|
140
141
|
- spec/lib/distillery/document_spec.rb
|
141
142
|
- spec/lib/distillery_spec.rb
|
142
143
|
- spec/spec_helper.rb
|
143
|
-
has_rdoc: true
|
144
144
|
homepage: https://github.com/Fluxx/distillery
|
145
145
|
licenses: []
|
146
146
|
post_install_message:
|
@@ -161,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
161
|
version: '0'
|
162
162
|
requirements: []
|
163
163
|
rubyforge_project: distillery
|
164
|
-
rubygems_version: 1.6
|
164
|
+
rubygems_version: 1.8.6
|
165
165
|
signing_key:
|
166
166
|
specification_version: 3
|
167
167
|
summary: Extract the content portion of an HTML document.
|
@@ -172,8 +172,10 @@ test_files:
|
|
172
172
|
- spec/fixtures/baked_ziti.html
|
173
173
|
- spec/fixtures/beef_jerkey.html
|
174
174
|
- spec/fixtures/bourbon_balls.html
|
175
|
+
- spec/fixtures/bulgogi.html
|
175
176
|
- spec/fixtures/clams_and_linguini.html
|
176
177
|
- spec/fixtures/clouds_shining_moment.html
|
178
|
+
- spec/fixtures/forest_ham.html
|
177
179
|
- spec/fixtures/game_blog.html
|
178
180
|
- spec/fixtures/ginger_cookies.html
|
179
181
|
- spec/fixtures/js_this_keyword.html
|