escapement 0.1.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +6 -0
- data/README.md +1 -7
- data/Rakefile +8 -0
- data/escapement.gemspec +1 -1
- data/lib/escapement/html.rb +19 -0
- data/lib/escapement/tag.rb +0 -1
- data/lib/escapement/version.rb +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6fa90e5289492fd35e77857b9caac7a9d70f2bee
|
4
|
+
data.tar.gz: e1786a0edbbb18ec4aa741cd762d73abdd6f20bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a11eca3bcb49edf065e4c5caec62dd61588e77febaea83cbbe757ac8e701ae544aaa1336e6294a69a92899c8e0b5aba4d87d592e543eae3f78f5a276b3ce7750
|
7
|
+
data.tar.gz: a82bf4073571a62cbcac6416680271e253d5d55af1624f9b916a27288d846d803e9434c48f04f8919655d7c49c522102bd9067d684d8548b7773ef3e4fe478a2
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -37,15 +37,9 @@ html.results
|
|
37
37
|
|
38
38
|
From a high level, Escapement uses [Nokogiri](https://github.com/sparklemotion/nokogiri) to recursively traverse the DOM tree. As it traverses, it keeps track of the current position of the node relative to the text content in order to determine entity position. There are no regular expression hacks involved.
|
39
39
|
|
40
|
-
## Development
|
41
|
-
|
42
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
43
|
-
|
44
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
45
|
-
|
46
40
|
## Contributing
|
47
41
|
|
48
|
-
1. Fork it ( https://github.com/
|
42
|
+
1. Fork it ( https://github.com/hodinkee/escapement/fork )
|
49
43
|
2. Create your feature branch (`git checkout -b my-new-feature`)
|
50
44
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
51
45
|
4. Push to the branch (`git push origin my-new-feature`)
|
data/Rakefile
CHANGED
data/escapement.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
|
|
22
22
|
|
23
23
|
spec.add_dependency "nokogiri", "~> 1.6"
|
24
24
|
|
25
|
-
spec.add_development_dependency "bundler"
|
25
|
+
spec.add_development_dependency "bundler"
|
26
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
27
27
|
spec.add_development_dependency "rspec", "~> 3"
|
28
28
|
end
|
data/lib/escapement/html.rb
CHANGED
@@ -13,8 +13,27 @@ module Escapement
|
|
13
13
|
|
14
14
|
# Extracts all of the entities for each paragraph/block.
|
15
15
|
def extract!
|
16
|
+
preprocess!
|
17
|
+
|
16
18
|
@blocks = doc.css('body').children.map { |child| Block.new(child).tap(&:process!) }
|
17
19
|
@results = @blocks.reject { |b| b.result.nil? }.map(&:result)
|
18
20
|
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
# Run a preprocess pass on the HTML in order to format certain entities
|
25
|
+
# before we start recording entity positions.
|
26
|
+
def preprocess!(node = @doc)
|
27
|
+
node.children.each do |child|
|
28
|
+
preprocess_node(child)
|
29
|
+
preprocess!(child)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def preprocess_node(node)
|
34
|
+
if node.name == 'br'
|
35
|
+
node.replace Nokogiri::XML::Text.new("\n", @doc)
|
36
|
+
end
|
37
|
+
end
|
19
38
|
end
|
20
39
|
end
|
data/lib/escapement/tag.rb
CHANGED
data/lib/escapement/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: escapement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan LeFevre
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -28,16 +28,16 @@ dependencies:
|
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -77,6 +77,7 @@ extra_rdoc_files: []
|
|
77
77
|
files:
|
78
78
|
- ".gitignore"
|
79
79
|
- ".rspec"
|
80
|
+
- ".travis.yml"
|
80
81
|
- Gemfile
|
81
82
|
- LICENSE
|
82
83
|
- README.md
|
@@ -110,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
111
|
version: '0'
|
111
112
|
requirements: []
|
112
113
|
rubyforge_project:
|
113
|
-
rubygems_version: 2.
|
114
|
+
rubygems_version: 2.5.1
|
114
115
|
signing_key:
|
115
116
|
specification_version: 4
|
116
117
|
summary: Extract child entities from an HTML string.
|