deba 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/deba/blockquote.rb +5 -0
- data/lib/deba/extractor.rb +14 -6
- data/lib/deba/list_item.rb +5 -2
- data/lib/deba/paragraph.rb +3 -2
- data/lib/deba/stringifier.rb +12 -1
- data/lib/deba.rb +2 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5a06ceb1a911deadddfcd27e1af4fcd50516498
|
4
|
+
data.tar.gz: f72a8c85575d467632818f87baf7d66cd39099ae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 12dadca99b049c5b89ca9a37c11990b1b150f2d59b1540e13da476a3152ea3c67c3a45400901318d3630bf2c7b668803030126828ead347636a0a33c6f231db9
|
7
|
+
data.tar.gz: b2c363d1c12cac1cfd79c05e6c545cdfc037942478b9009dc207633466bc1ab9d3de4e6f3e4a27ef2f051f3b192b2196690da5f2d315ff58b9a3952c87b6b529
|
data/lib/deba/extractor.rb
CHANGED
@@ -34,7 +34,7 @@ class Deba::Extractor
|
|
34
34
|
if @just_appended_br
|
35
35
|
@just_appended_br = false
|
36
36
|
|
37
|
-
@text_run.break(Deba::Paragraph)
|
37
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
38
38
|
|
39
39
|
return
|
40
40
|
else
|
@@ -67,18 +67,18 @@ class Deba::Extractor
|
|
67
67
|
if node_name == 'li'
|
68
68
|
last_item = node.xpath('count(following-sibling::li)').to_i == 0
|
69
69
|
index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
|
70
|
-
@text_run.break(Deba::ListItem, last_item, index)
|
70
|
+
@text_run.break(Deba::ListItem, line_prefix(node), last_item, index)
|
71
71
|
node.children.each { |n| process(n) }
|
72
|
-
@text_run.break(Deba::Paragraph)
|
72
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
73
73
|
|
74
74
|
return
|
75
75
|
end
|
76
76
|
|
77
77
|
#These tags terminate the current paragraph, if present, and start a new paragraph
|
78
78
|
if BLOCK_INITIATING_TAGS.include?(node_name)
|
79
|
-
@text_run.break(Deba::Paragraph)
|
79
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
80
80
|
node.children.each { |n| process(n) }
|
81
|
-
@text_run.break(Deba::Paragraph)
|
81
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
82
82
|
|
83
83
|
return
|
84
84
|
end
|
@@ -86,7 +86,7 @@ class Deba::Extractor
|
|
86
86
|
if HEADING_TAGS.include?(node_name)
|
87
87
|
@text_run.break(Deba::Heading, node_name[1..-1].to_i)
|
88
88
|
node.children.each { |n| process(n) }
|
89
|
-
@text_run.break(Deba::Paragraph)
|
89
|
+
@text_run.break(Deba::Paragraph, line_prefix(node))
|
90
90
|
|
91
91
|
return
|
92
92
|
end
|
@@ -94,4 +94,12 @@ class Deba::Extractor
|
|
94
94
|
#Pretend that the children of this node were siblings of this node (move them one level up the tree)
|
95
95
|
node.children.each { |n| process(n) }
|
96
96
|
end
|
97
|
+
|
98
|
+
def line_prefix(node)
|
99
|
+
if node.xpath('boolean(ancestor::blockquote)')
|
100
|
+
Deba::Blockquote.new
|
101
|
+
else
|
102
|
+
nil
|
103
|
+
end
|
104
|
+
end
|
97
105
|
end
|
data/lib/deba/list_item.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
class Deba::ListItem
|
2
2
|
attr_reader :segments
|
3
3
|
|
4
|
-
def initialize(segments, last, index)
|
4
|
+
def initialize(segments, line_prefix, last, index)
|
5
5
|
@segments = segments
|
6
|
+
@line_prefix = line_prefix
|
6
7
|
@last = last
|
7
8
|
@index = index
|
8
9
|
end
|
@@ -14,6 +15,8 @@ class Deba::ListItem
|
|
14
15
|
"#{@index}. "
|
15
16
|
end
|
16
17
|
|
17
|
-
|
18
|
+
@segments.unshift(prefix)
|
19
|
+
|
20
|
+
"#{Deba::Stringifier.new(@segments, @line_prefix).stringify}\n#{"\n" if @last}"
|
18
21
|
end
|
19
22
|
end
|
data/lib/deba/paragraph.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
class Deba::Paragraph
|
2
2
|
attr_reader :segments
|
3
3
|
|
4
|
-
def initialize(segments)
|
4
|
+
def initialize(segments, line_prefix)
|
5
5
|
@segments = segments
|
6
|
+
@line_prefix = line_prefix
|
6
7
|
end
|
7
8
|
|
8
9
|
def to_s
|
9
|
-
"#{Deba::Stringifier.new(@segments).stringify}\n\n"
|
10
|
+
"#{Deba::Stringifier.new(@segments, @line_prefix).stringify}\n\n"
|
10
11
|
end
|
11
12
|
end
|
data/lib/deba/stringifier.rb
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
class Deba::Stringifier
|
2
|
-
def initialize(segments)
|
2
|
+
def initialize(segments, line_prefix = nil)
|
3
3
|
@segments = segments
|
4
|
+
@line_prefix = line_prefix
|
4
5
|
end
|
5
6
|
|
6
7
|
def stringify
|
8
|
+
prefix(chunkify)
|
9
|
+
end
|
10
|
+
|
11
|
+
def chunkify
|
7
12
|
chunks = @segments.chunk { |segment| segment.class }
|
8
13
|
|
9
14
|
chunks.map do |type, chunk_segments|
|
@@ -14,4 +19,10 @@ class Deba::Stringifier
|
|
14
19
|
end
|
15
20
|
end.join
|
16
21
|
end
|
22
|
+
|
23
|
+
def prefix(text)
|
24
|
+
return text if @line_prefix.nil?
|
25
|
+
|
26
|
+
text.gsub(/^/, @line_prefix.to_s)
|
27
|
+
end
|
17
28
|
end
|
data/lib/deba.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
|
3
3
|
module Deba
|
4
|
-
VERSION = "0.
|
4
|
+
VERSION = "0.9.0"
|
5
5
|
end
|
6
6
|
|
7
7
|
require "deba/utils"
|
@@ -10,6 +10,7 @@ require "deba/document"
|
|
10
10
|
require "deba/break"
|
11
11
|
require "deba/heading"
|
12
12
|
require "deba/list_item"
|
13
|
+
require "deba/blockquote"
|
13
14
|
require "deba/paragraph"
|
14
15
|
require "deba/text_runner"
|
15
16
|
require "deba/extractor"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brenton "B-Train" Fletcher
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -71,6 +71,7 @@ files:
|
|
71
71
|
- deba.gemspec
|
72
72
|
- exe/deba
|
73
73
|
- lib/deba.rb
|
74
|
+
- lib/deba/blockquote.rb
|
74
75
|
- lib/deba/break.rb
|
75
76
|
- lib/deba/document.rb
|
76
77
|
- lib/deba/extractor.rb
|
@@ -100,7 +101,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
101
|
version: '0'
|
101
102
|
requirements: []
|
102
103
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.
|
104
|
+
rubygems_version: 2.4.5.1
|
104
105
|
signing_key:
|
105
106
|
specification_version: 4
|
106
107
|
summary: Fillet HTML using this Deba knife to extract the juicy text content
|