deba 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/deba/definition_description.rb +3 -6
- data/lib/deba/definition_term.rb +3 -6
- data/lib/deba/document.rb +31 -7
- data/lib/deba/extractor.rb +33 -24
- data/lib/deba/heading.rb +2 -4
- data/lib/deba/list_item.rb +3 -6
- data/lib/deba/paragraph.rb +7 -6
- data/lib/deba/span.rb +9 -0
- data/lib/deba/stringifier.rb +5 -16
- data/lib/deba.rb +3 -9
- metadata +3 -5
- data/lib/deba/blockquote.rb +0 -5
- data/lib/deba/break.rb +0 -5
- data/lib/deba/text_runner.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64da76c5a1c04969f66f580d20e350c7bca13715
|
4
|
+
data.tar.gz: fa665cf07572193d1a3f6ffcfa976885bf50a9ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ef43a8c834a219a8b4fa5ad78cc4a59f10b09ec637ca46574a2ef61d22cc879ac9da32fea5fbf904d396b6b95c9c64301c730a818914149d9e9db840776dc215
|
7
|
+
data.tar.gz: 0f06e42c7267719f135a4bbcd0e6d24565979b19c3ade21bf49e622fccb7bec3c7cfe332651dde6fcbb2158064e08f8a560b047a1f9c029a49398711d9678cb1
|
@@ -1,13 +1,10 @@
|
|
1
1
|
class Deba::DefinitionDescription
|
2
|
-
|
3
|
-
|
4
|
-
def initialize(segments, line_prefix, last)
|
2
|
+
def initialize(segments, last)
|
5
3
|
@segments = segments
|
6
|
-
@line_prefix = line_prefix
|
7
4
|
@last = last
|
8
5
|
end
|
9
6
|
|
10
|
-
def
|
11
|
-
|
7
|
+
def to_a
|
8
|
+
@segments + ["\n#{"\n" if @last}"]
|
12
9
|
end
|
13
10
|
end
|
data/lib/deba/definition_term.rb
CHANGED
@@ -1,12 +1,9 @@
|
|
1
1
|
class Deba::DefinitionTerm
|
2
|
-
|
3
|
-
|
4
|
-
def initialize(segments, line_prefix)
|
2
|
+
def initialize(segments)
|
5
3
|
@segments = segments
|
6
|
-
@line_prefix = line_prefix
|
7
4
|
end
|
8
5
|
|
9
|
-
def
|
10
|
-
|
6
|
+
def to_a
|
7
|
+
@segments + [":\n"]
|
11
8
|
end
|
12
9
|
end
|
data/lib/deba/document.rb
CHANGED
@@ -1,15 +1,39 @@
|
|
1
1
|
class Deba::Document
|
2
|
-
attr_reader :
|
2
|
+
attr_reader :content
|
3
3
|
|
4
|
-
def initialize
|
5
|
-
@
|
4
|
+
def initialize(extractor)
|
5
|
+
@extractor = extractor
|
6
|
+
@content = ""
|
7
|
+
|
8
|
+
start
|
9
|
+
end
|
10
|
+
|
11
|
+
def <<(segment)
|
12
|
+
@segments << segment
|
13
|
+
end
|
14
|
+
|
15
|
+
def break(*args)
|
16
|
+
finish
|
17
|
+
start(*args)
|
18
|
+
end
|
19
|
+
|
20
|
+
def finish
|
21
|
+
return unless present?
|
22
|
+
|
23
|
+
@args.unshift(@segments)
|
24
|
+
block = @block_type.new(*@args).to_a
|
25
|
+
block.unshift("> ") if @extractor.in_blockquote?
|
26
|
+
|
27
|
+
@content << Deba::Stringifier.new(block).stringify
|
6
28
|
end
|
7
29
|
|
8
|
-
def
|
9
|
-
@
|
30
|
+
def start(*args)
|
31
|
+
@segments = []
|
32
|
+
@block_type = args.shift
|
33
|
+
@args = args
|
10
34
|
end
|
11
35
|
|
12
|
-
def
|
13
|
-
@
|
36
|
+
def present?
|
37
|
+
@segments.any? { |segment| segment.is_a?(Deba::Span) && Deba::Utils.present?(segment.to_s) }
|
14
38
|
end
|
15
39
|
end
|
data/lib/deba/extractor.rb
CHANGED
@@ -12,12 +12,12 @@ class Deba::Extractor
|
|
12
12
|
|
13
13
|
def extract
|
14
14
|
@just_appended_br = false
|
15
|
-
@
|
16
|
-
@
|
15
|
+
@in_blockquote = false
|
16
|
+
@document = Deba::Document.new(self)
|
17
17
|
|
18
18
|
process(@node)
|
19
19
|
|
20
|
-
@document
|
20
|
+
@document.content.chomp("\n")
|
21
21
|
end
|
22
22
|
|
23
23
|
def process(node)
|
@@ -34,7 +34,7 @@ class Deba::Extractor
|
|
34
34
|
if @just_appended_br
|
35
35
|
@just_appended_br = false
|
36
36
|
|
37
|
-
@
|
37
|
+
@document.break(Deba::Paragraph)
|
38
38
|
|
39
39
|
return
|
40
40
|
else
|
@@ -43,11 +43,11 @@ class Deba::Extractor
|
|
43
43
|
elsif @just_appended_br
|
44
44
|
@just_appended_br = false
|
45
45
|
|
46
|
-
@
|
46
|
+
@document << "\n"
|
47
47
|
end
|
48
48
|
|
49
49
|
if node.text?
|
50
|
-
@
|
50
|
+
@document << Deba::Span.new(node.inner_text) if Deba::Utils.present?(node.inner_text)
|
51
51
|
|
52
52
|
return
|
53
53
|
end
|
@@ -55,55 +55,68 @@ class Deba::Extractor
|
|
55
55
|
if ENHANCERS.keys.flatten.include?(node_name)
|
56
56
|
ENHANCERS.each_pair do |tags, nsf_rep|
|
57
57
|
if tags.include?(node_name)
|
58
|
-
@
|
58
|
+
@document << nsf_rep
|
59
59
|
node.children.each { |n| process(n) }
|
60
|
-
@
|
60
|
+
@document << nsf_rep
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
64
|
return
|
65
65
|
end
|
66
66
|
|
67
|
+
if node_name == 'blockquote'
|
68
|
+
@in_blockquote = true
|
69
|
+
|
70
|
+
@document.break(Deba::Paragraph)
|
71
|
+
node.children.each { |n| process(n) }
|
72
|
+
@document.break(Deba::Paragraph)
|
73
|
+
|
74
|
+
@in_blockquote = false
|
75
|
+
|
76
|
+
return
|
77
|
+
end
|
78
|
+
|
67
79
|
if node_name == 'li'
|
68
80
|
last_item = node.xpath('count(following-sibling::li)').to_i == 0
|
69
81
|
index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
|
70
|
-
|
82
|
+
|
83
|
+
@document.break(Deba::ListItem, last_item, index)
|
71
84
|
node.children.each { |n| process(n) }
|
72
|
-
@
|
85
|
+
@document.break(Deba::Paragraph)
|
73
86
|
|
74
87
|
return
|
75
88
|
end
|
76
89
|
|
77
90
|
if node_name == 'dt'
|
78
|
-
@
|
91
|
+
@document.break(Deba::DefinitionTerm)
|
79
92
|
node.children.each { |n| process(n) }
|
80
|
-
@
|
93
|
+
@document.break(Deba::Paragraph)
|
81
94
|
|
82
95
|
return
|
83
96
|
end
|
84
97
|
|
85
98
|
if node_name == 'dd'
|
86
99
|
last_item = node.xpath('count(following-sibling::dd)').to_i == 0
|
87
|
-
@
|
100
|
+
@document.break(Deba::DefinitionDescription, last_item)
|
88
101
|
node.children.each { |n| process(n) }
|
89
|
-
@
|
102
|
+
@document.break(Deba::Paragraph)
|
90
103
|
|
91
104
|
return
|
92
105
|
end
|
93
106
|
|
94
107
|
#These tags terminate the current paragraph, if present, and start a new paragraph
|
95
108
|
if BLOCK_INITIATING_TAGS.include?(node_name)
|
96
|
-
@
|
109
|
+
@document.break(Deba::Paragraph)
|
97
110
|
node.children.each { |n| process(n) }
|
98
|
-
@
|
111
|
+
@document.break(Deba::Paragraph)
|
99
112
|
|
100
113
|
return
|
101
114
|
end
|
102
115
|
|
103
116
|
if HEADING_TAGS.include?(node_name)
|
104
|
-
@
|
117
|
+
@document.break(Deba::Heading, node_name[1..-1].to_i)
|
105
118
|
node.children.each { |n| process(n) }
|
106
|
-
@
|
119
|
+
@document.break(Deba::Paragraph)
|
107
120
|
|
108
121
|
return
|
109
122
|
end
|
@@ -112,11 +125,7 @@ class Deba::Extractor
|
|
112
125
|
node.children.each { |n| process(n) }
|
113
126
|
end
|
114
127
|
|
115
|
-
def
|
116
|
-
|
117
|
-
Deba::Blockquote.new
|
118
|
-
else
|
119
|
-
nil
|
120
|
-
end
|
128
|
+
def in_blockquote?
|
129
|
+
@in_blockquote
|
121
130
|
end
|
122
131
|
end
|
data/lib/deba/heading.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
class Deba::Heading
|
2
|
-
attr_reader :segments, :level
|
3
|
-
|
4
2
|
def initialize(segments, level)
|
5
3
|
@segments = segments
|
6
4
|
@level = level
|
7
5
|
end
|
8
6
|
|
9
|
-
def
|
10
|
-
"#
|
7
|
+
def to_a
|
8
|
+
["#" * @level] + @segments + ["\n\n"]
|
11
9
|
end
|
12
10
|
end
|
data/lib/deba/list_item.rb
CHANGED
@@ -1,15 +1,12 @@
|
|
1
1
|
class Deba::ListItem
|
2
|
-
|
3
|
-
|
4
|
-
def initialize(segments, line_prefix, last, index)
|
2
|
+
def initialize(segments, last, index)
|
5
3
|
@segments = segments
|
6
|
-
@line_prefix = line_prefix
|
7
4
|
@last = last
|
8
5
|
@index = index
|
9
6
|
end
|
10
7
|
|
11
|
-
def
|
12
|
-
|
8
|
+
def to_a
|
9
|
+
[prefix] + @segments + ["\n#{"\n" if @last}"]
|
13
10
|
end
|
14
11
|
|
15
12
|
def prefix
|
data/lib/deba/paragraph.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
class Deba::Paragraph
|
2
|
-
|
3
|
-
|
4
|
-
def initialize(segments, line_prefix)
|
2
|
+
def initialize(segments)
|
5
3
|
@segments = segments
|
6
|
-
@line_prefix = line_prefix
|
7
4
|
end
|
8
5
|
|
9
|
-
def
|
10
|
-
|
6
|
+
def always?
|
7
|
+
false
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_a
|
11
|
+
@segments + ["\n\n"]
|
11
12
|
end
|
12
13
|
end
|
data/lib/deba/span.rb
ADDED
data/lib/deba/stringifier.rb
CHANGED
@@ -1,28 +1,17 @@
|
|
1
1
|
class Deba::Stringifier
|
2
|
-
def initialize(segments
|
2
|
+
def initialize(segments)
|
3
3
|
@segments = segments
|
4
|
-
@line_prefix = line_prefix
|
5
4
|
end
|
6
5
|
|
7
6
|
def stringify
|
8
|
-
prefix(chunkify)
|
9
|
-
end
|
10
|
-
|
11
|
-
def chunkify
|
12
7
|
chunks = @segments.chunk { |segment| segment.class }
|
13
8
|
|
14
9
|
chunks.map do |type, chunk_segments|
|
15
|
-
if type ==
|
16
|
-
Deba::Utils.normalise(chunk_segments.join)
|
17
|
-
|
18
|
-
chunk_segments.
|
10
|
+
if type == Deba::Span
|
11
|
+
Deba::Utils.normalise(chunk_segments.map { |s| s.to_s }.join)
|
12
|
+
else
|
13
|
+
chunk_segments.join
|
19
14
|
end
|
20
15
|
end.join
|
21
16
|
end
|
22
|
-
|
23
|
-
def prefix(text)
|
24
|
-
return text if @line_prefix.nil?
|
25
|
-
|
26
|
-
text.gsub(/^/, @line_prefix.to_s)
|
27
|
-
end
|
28
17
|
end
|
data/lib/deba.rb
CHANGED
@@ -1,28 +1,22 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
|
3
3
|
module Deba
|
4
|
-
VERSION = "0.
|
4
|
+
VERSION = "0.11.0"
|
5
5
|
end
|
6
6
|
|
7
7
|
require "deba/utils"
|
8
8
|
require "deba/stringifier"
|
9
|
-
require "deba/
|
10
|
-
require "deba/break"
|
9
|
+
require "deba/span"
|
11
10
|
require "deba/heading"
|
12
11
|
require "deba/list_item"
|
13
|
-
require "deba/blockquote"
|
14
12
|
require "deba/definition_term"
|
15
13
|
require "deba/definition_description"
|
16
14
|
require "deba/paragraph"
|
17
|
-
require "deba/
|
15
|
+
require "deba/document"
|
18
16
|
require "deba/extractor"
|
19
17
|
|
20
18
|
module Deba
|
21
19
|
def self.extract(html, options = {})
|
22
|
-
document(html, options).to_s
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.document(html, options = {})
|
26
20
|
doc = html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri.HTML(html)
|
27
21
|
Deba::Extractor.new(doc, options).extract
|
28
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brenton "B-Train" Fletcher
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -71,8 +71,6 @@ files:
|
|
71
71
|
- deba.gemspec
|
72
72
|
- exe/deba
|
73
73
|
- lib/deba.rb
|
74
|
-
- lib/deba/blockquote.rb
|
75
|
-
- lib/deba/break.rb
|
76
74
|
- lib/deba/definition_description.rb
|
77
75
|
- lib/deba/definition_term.rb
|
78
76
|
- lib/deba/document.rb
|
@@ -80,8 +78,8 @@ files:
|
|
80
78
|
- lib/deba/heading.rb
|
81
79
|
- lib/deba/list_item.rb
|
82
80
|
- lib/deba/paragraph.rb
|
81
|
+
- lib/deba/span.rb
|
83
82
|
- lib/deba/stringifier.rb
|
84
|
-
- lib/deba/text_runner.rb
|
85
83
|
- lib/deba/utils.rb
|
86
84
|
homepage: http://example.com
|
87
85
|
licenses:
|
data/lib/deba/blockquote.rb
DELETED
data/lib/deba/break.rb
DELETED
data/lib/deba/text_runner.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
class Deba::TextRunner
|
2
|
-
def initialize(document)
|
3
|
-
@document = document
|
4
|
-
|
5
|
-
start
|
6
|
-
end
|
7
|
-
|
8
|
-
def <<(segment)
|
9
|
-
@segments << segment
|
10
|
-
end
|
11
|
-
|
12
|
-
def break(*args)
|
13
|
-
finish
|
14
|
-
start(*args)
|
15
|
-
end
|
16
|
-
|
17
|
-
def finish
|
18
|
-
return unless present?
|
19
|
-
|
20
|
-
@args.unshift(@segments)
|
21
|
-
@document << @block_type.new(*@args)
|
22
|
-
end
|
23
|
-
|
24
|
-
def start(*args)
|
25
|
-
@segments = []
|
26
|
-
@block_type = args.shift
|
27
|
-
@args = args
|
28
|
-
end
|
29
|
-
|
30
|
-
def present?
|
31
|
-
@segments.any? { |segment| segment.is_a?(String) && Deba::Utils.present?(segment) }
|
32
|
-
end
|
33
|
-
end
|