escapement 0.1.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -0
- data/README.md +1 -7
- data/Rakefile +8 -0
- data/escapement.gemspec +1 -1
- data/lib/escapement/html.rb +19 -0
- data/lib/escapement/tag.rb +0 -1
- data/lib/escapement/version.rb +1 -1
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6fa90e5289492fd35e77857b9caac7a9d70f2bee
|
4
|
+
data.tar.gz: e1786a0edbbb18ec4aa741cd762d73abdd6f20bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a11eca3bcb49edf065e4c5caec62dd61588e77febaea83cbbe757ac8e701ae544aaa1336e6294a69a92899c8e0b5aba4d87d592e543eae3f78f5a276b3ce7750
|
7
|
+
data.tar.gz: a82bf4073571a62cbcac6416680271e253d5d55af1624f9b916a27288d846d803e9434c48f04f8919655d7c49c522102bd9067d684d8548b7773ef3e4fe478a2
|
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -37,15 +37,9 @@ html.results
|
|
37
37
|
|
38
38
|
From a high level, Escapement uses [Nokogiri](https://github.com/sparklemotion/nokogiri) to recursively traverse the DOM tree. As it traverses, it keeps track of the current position of the node relative to the text content in order to determine entity position. There are no regular expression hacks involved.
|
39
39
|
|
40
|
-
## Development
|
41
|
-
|
42
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
43
|
-
|
44
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
45
|
-
|
46
40
|
## Contributing
|
47
41
|
|
48
|
-
1. Fork it ( https://github.com/
|
42
|
+
1. Fork it ( https://github.com/hodinkee/escapement/fork )
|
49
43
|
2. Create your feature branch (`git checkout -b my-new-feature`)
|
50
44
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
51
45
|
4. Push to the branch (`git push origin my-new-feature`)
|
data/Rakefile
CHANGED
data/escapement.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
|
|
22
22
|
|
23
23
|
spec.add_dependency "nokogiri", "~> 1.6"
|
24
24
|
|
25
|
-
spec.add_development_dependency "bundler"
|
25
|
+
spec.add_development_dependency "bundler"
|
26
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
27
27
|
spec.add_development_dependency "rspec", "~> 3"
|
28
28
|
end
|
data/lib/escapement/html.rb
CHANGED
@@ -13,8 +13,27 @@ module Escapement
|
|
13
13
|
|
14
14
|
# Extracts all of the entities for each paragraph/block.
|
15
15
|
def extract!
|
16
|
+
preprocess!
|
17
|
+
|
16
18
|
@blocks = doc.css('body').children.map { |child| Block.new(child).tap(&:process!) }
|
17
19
|
@results = @blocks.reject { |b| b.result.nil? }.map(&:result)
|
18
20
|
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
# Run a preprocess pass on the HTML in order to format certain entities
|
25
|
+
# before we start recording entity positions.
|
26
|
+
def preprocess!(node = @doc)
|
27
|
+
node.children.each do |child|
|
28
|
+
preprocess_node(child)
|
29
|
+
preprocess!(child)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def preprocess_node(node)
|
34
|
+
if node.name == 'br'
|
35
|
+
node.replace Nokogiri::XML::Text.new("\n", @doc)
|
36
|
+
end
|
37
|
+
end
|
19
38
|
end
|
20
39
|
end
|
data/lib/escapement/tag.rb
CHANGED
data/lib/escapement/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: escapement
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan LeFevre
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -28,16 +28,16 @@ dependencies:
|
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -77,6 +77,7 @@ extra_rdoc_files: []
|
|
77
77
|
files:
|
78
78
|
- ".gitignore"
|
79
79
|
- ".rspec"
|
80
|
+
- ".travis.yml"
|
80
81
|
- Gemfile
|
81
82
|
- LICENSE
|
82
83
|
- README.md
|
@@ -110,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
111
|
version: '0'
|
111
112
|
requirements: []
|
112
113
|
rubyforge_project:
|
113
|
-
rubygems_version: 2.
|
114
|
+
rubygems_version: 2.5.1
|
114
115
|
signing_key:
|
115
116
|
specification_version: 4
|
116
117
|
summary: Extract child entities from an HTML string.
|