deba 0.10.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/deba/definition_description.rb +3 -6
- data/lib/deba/definition_term.rb +3 -6
- data/lib/deba/document.rb +31 -7
- data/lib/deba/extractor.rb +33 -24
- data/lib/deba/heading.rb +2 -4
- data/lib/deba/list_item.rb +3 -6
- data/lib/deba/paragraph.rb +7 -6
- data/lib/deba/span.rb +9 -0
- data/lib/deba/stringifier.rb +5 -16
- data/lib/deba.rb +3 -9
- metadata +3 -5
- data/lib/deba/blockquote.rb +0 -5
- data/lib/deba/break.rb +0 -5
- data/lib/deba/text_runner.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64da76c5a1c04969f66f580d20e350c7bca13715
|
4
|
+
data.tar.gz: fa665cf07572193d1a3f6ffcfa976885bf50a9ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ef43a8c834a219a8b4fa5ad78cc4a59f10b09ec637ca46574a2ef61d22cc879ac9da32fea5fbf904d396b6b95c9c64301c730a818914149d9e9db840776dc215
|
7
|
+
data.tar.gz: 0f06e42c7267719f135a4bbcd0e6d24565979b19c3ade21bf49e622fccb7bec3c7cfe332651dde6fcbb2158064e08f8a560b047a1f9c029a49398711d9678cb1
|
@@ -1,13 +1,10 @@
|
|
1
1
|
class Deba::DefinitionDescription
|
2
|
-
|
3
|
-
|
4
|
-
def initialize(segments, line_prefix, last)
|
2
|
+
def initialize(segments, last)
|
5
3
|
@segments = segments
|
6
|
-
@line_prefix = line_prefix
|
7
4
|
@last = last
|
8
5
|
end
|
9
6
|
|
10
|
-
def
|
11
|
-
|
7
|
+
def to_a
|
8
|
+
@segments + ["\n#{"\n" if @last}"]
|
12
9
|
end
|
13
10
|
end
|
data/lib/deba/definition_term.rb
CHANGED
@@ -1,12 +1,9 @@
|
|
1
1
|
class Deba::DefinitionTerm
|
2
|
-
|
3
|
-
|
4
|
-
def initialize(segments, line_prefix)
|
2
|
+
def initialize(segments)
|
5
3
|
@segments = segments
|
6
|
-
@line_prefix = line_prefix
|
7
4
|
end
|
8
5
|
|
9
|
-
def
|
10
|
-
|
6
|
+
def to_a
|
7
|
+
@segments + [":\n"]
|
11
8
|
end
|
12
9
|
end
|
data/lib/deba/document.rb
CHANGED
@@ -1,15 +1,39 @@
|
|
1
1
|
class Deba::Document
|
2
|
-
attr_reader :
|
2
|
+
attr_reader :content
|
3
3
|
|
4
|
-
def initialize
|
5
|
-
@
|
4
|
+
def initialize(extractor)
|
5
|
+
@extractor = extractor
|
6
|
+
@content = ""
|
7
|
+
|
8
|
+
start
|
9
|
+
end
|
10
|
+
|
11
|
+
def <<(segment)
|
12
|
+
@segments << segment
|
13
|
+
end
|
14
|
+
|
15
|
+
def break(*args)
|
16
|
+
finish
|
17
|
+
start(*args)
|
18
|
+
end
|
19
|
+
|
20
|
+
def finish
|
21
|
+
return unless present?
|
22
|
+
|
23
|
+
@args.unshift(@segments)
|
24
|
+
block = @block_type.new(*@args).to_a
|
25
|
+
block.unshift("> ") if @extractor.in_blockquote?
|
26
|
+
|
27
|
+
@content << Deba::Stringifier.new(block).stringify
|
6
28
|
end
|
7
29
|
|
8
|
-
def
|
9
|
-
@
|
30
|
+
def start(*args)
|
31
|
+
@segments = []
|
32
|
+
@block_type = args.shift
|
33
|
+
@args = args
|
10
34
|
end
|
11
35
|
|
12
|
-
def
|
13
|
-
@
|
36
|
+
def present?
|
37
|
+
@segments.any? { |segment| segment.is_a?(Deba::Span) && Deba::Utils.present?(segment.to_s) }
|
14
38
|
end
|
15
39
|
end
|
data/lib/deba/extractor.rb
CHANGED
@@ -12,12 +12,12 @@ class Deba::Extractor
|
|
12
12
|
|
13
13
|
def extract
|
14
14
|
@just_appended_br = false
|
15
|
-
@
|
16
|
-
@
|
15
|
+
@in_blockquote = false
|
16
|
+
@document = Deba::Document.new(self)
|
17
17
|
|
18
18
|
process(@node)
|
19
19
|
|
20
|
-
@document
|
20
|
+
@document.content.chomp("\n")
|
21
21
|
end
|
22
22
|
|
23
23
|
def process(node)
|
@@ -34,7 +34,7 @@ class Deba::Extractor
|
|
34
34
|
if @just_appended_br
|
35
35
|
@just_appended_br = false
|
36
36
|
|
37
|
-
@
|
37
|
+
@document.break(Deba::Paragraph)
|
38
38
|
|
39
39
|
return
|
40
40
|
else
|
@@ -43,11 +43,11 @@ class Deba::Extractor
|
|
43
43
|
elsif @just_appended_br
|
44
44
|
@just_appended_br = false
|
45
45
|
|
46
|
-
@
|
46
|
+
@document << "\n"
|
47
47
|
end
|
48
48
|
|
49
49
|
if node.text?
|
50
|
-
@
|
50
|
+
@document << Deba::Span.new(node.inner_text) if Deba::Utils.present?(node.inner_text)
|
51
51
|
|
52
52
|
return
|
53
53
|
end
|
@@ -55,55 +55,68 @@ class Deba::Extractor
|
|
55
55
|
if ENHANCERS.keys.flatten.include?(node_name)
|
56
56
|
ENHANCERS.each_pair do |tags, nsf_rep|
|
57
57
|
if tags.include?(node_name)
|
58
|
-
@
|
58
|
+
@document << nsf_rep
|
59
59
|
node.children.each { |n| process(n) }
|
60
|
-
@
|
60
|
+
@document << nsf_rep
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
64
|
return
|
65
65
|
end
|
66
66
|
|
67
|
+
if node_name == 'blockquote'
|
68
|
+
@in_blockquote = true
|
69
|
+
|
70
|
+
@document.break(Deba::Paragraph)
|
71
|
+
node.children.each { |n| process(n) }
|
72
|
+
@document.break(Deba::Paragraph)
|
73
|
+
|
74
|
+
@in_blockquote = false
|
75
|
+
|
76
|
+
return
|
77
|
+
end
|
78
|
+
|
67
79
|
if node_name == 'li'
|
68
80
|
last_item = node.xpath('count(following-sibling::li)').to_i == 0
|
69
81
|
index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
|
70
|
-
|
82
|
+
|
83
|
+
@document.break(Deba::ListItem, last_item, index)
|
71
84
|
node.children.each { |n| process(n) }
|
72
|
-
@
|
85
|
+
@document.break(Deba::Paragraph)
|
73
86
|
|
74
87
|
return
|
75
88
|
end
|
76
89
|
|
77
90
|
if node_name == 'dt'
|
78
|
-
@
|
91
|
+
@document.break(Deba::DefinitionTerm)
|
79
92
|
node.children.each { |n| process(n) }
|
80
|
-
@
|
93
|
+
@document.break(Deba::Paragraph)
|
81
94
|
|
82
95
|
return
|
83
96
|
end
|
84
97
|
|
85
98
|
if node_name == 'dd'
|
86
99
|
last_item = node.xpath('count(following-sibling::dd)').to_i == 0
|
87
|
-
@
|
100
|
+
@document.break(Deba::DefinitionDescription, last_item)
|
88
101
|
node.children.each { |n| process(n) }
|
89
|
-
@
|
102
|
+
@document.break(Deba::Paragraph)
|
90
103
|
|
91
104
|
return
|
92
105
|
end
|
93
106
|
|
94
107
|
#These tags terminate the current paragraph, if present, and start a new paragraph
|
95
108
|
if BLOCK_INITIATING_TAGS.include?(node_name)
|
96
|
-
@
|
109
|
+
@document.break(Deba::Paragraph)
|
97
110
|
node.children.each { |n| process(n) }
|
98
|
-
@
|
111
|
+
@document.break(Deba::Paragraph)
|
99
112
|
|
100
113
|
return
|
101
114
|
end
|
102
115
|
|
103
116
|
if HEADING_TAGS.include?(node_name)
|
104
|
-
@
|
117
|
+
@document.break(Deba::Heading, node_name[1..-1].to_i)
|
105
118
|
node.children.each { |n| process(n) }
|
106
|
-
@
|
119
|
+
@document.break(Deba::Paragraph)
|
107
120
|
|
108
121
|
return
|
109
122
|
end
|
@@ -112,11 +125,7 @@ class Deba::Extractor
|
|
112
125
|
node.children.each { |n| process(n) }
|
113
126
|
end
|
114
127
|
|
115
|
-
def
|
116
|
-
|
117
|
-
Deba::Blockquote.new
|
118
|
-
else
|
119
|
-
nil
|
120
|
-
end
|
128
|
+
def in_blockquote?
|
129
|
+
@in_blockquote
|
121
130
|
end
|
122
131
|
end
|
data/lib/deba/heading.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
class Deba::Heading
|
2
|
-
attr_reader :segments, :level
|
3
|
-
|
4
2
|
def initialize(segments, level)
|
5
3
|
@segments = segments
|
6
4
|
@level = level
|
7
5
|
end
|
8
6
|
|
9
|
-
def
|
10
|
-
"#
|
7
|
+
def to_a
|
8
|
+
["#" * @level] + @segments + ["\n\n"]
|
11
9
|
end
|
12
10
|
end
|
data/lib/deba/list_item.rb
CHANGED
@@ -1,15 +1,12 @@
|
|
1
1
|
class Deba::ListItem
|
2
|
-
|
3
|
-
|
4
|
-
def initialize(segments, line_prefix, last, index)
|
2
|
+
def initialize(segments, last, index)
|
5
3
|
@segments = segments
|
6
|
-
@line_prefix = line_prefix
|
7
4
|
@last = last
|
8
5
|
@index = index
|
9
6
|
end
|
10
7
|
|
11
|
-
def
|
12
|
-
|
8
|
+
def to_a
|
9
|
+
[prefix] + @segments + ["\n#{"\n" if @last}"]
|
13
10
|
end
|
14
11
|
|
15
12
|
def prefix
|
data/lib/deba/paragraph.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
class Deba::Paragraph
|
2
|
-
|
3
|
-
|
4
|
-
def initialize(segments, line_prefix)
|
2
|
+
def initialize(segments)
|
5
3
|
@segments = segments
|
6
|
-
@line_prefix = line_prefix
|
7
4
|
end
|
8
5
|
|
9
|
-
def
|
10
|
-
|
6
|
+
def always?
|
7
|
+
false
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_a
|
11
|
+
@segments + ["\n\n"]
|
11
12
|
end
|
12
13
|
end
|
data/lib/deba/span.rb
ADDED
data/lib/deba/stringifier.rb
CHANGED
@@ -1,28 +1,17 @@
|
|
1
1
|
class Deba::Stringifier
|
2
|
-
def initialize(segments
|
2
|
+
def initialize(segments)
|
3
3
|
@segments = segments
|
4
|
-
@line_prefix = line_prefix
|
5
4
|
end
|
6
5
|
|
7
6
|
def stringify
|
8
|
-
prefix(chunkify)
|
9
|
-
end
|
10
|
-
|
11
|
-
def chunkify
|
12
7
|
chunks = @segments.chunk { |segment| segment.class }
|
13
8
|
|
14
9
|
chunks.map do |type, chunk_segments|
|
15
|
-
if type ==
|
16
|
-
Deba::Utils.normalise(chunk_segments.join)
|
17
|
-
|
18
|
-
chunk_segments.
|
10
|
+
if type == Deba::Span
|
11
|
+
Deba::Utils.normalise(chunk_segments.map { |s| s.to_s }.join)
|
12
|
+
else
|
13
|
+
chunk_segments.join
|
19
14
|
end
|
20
15
|
end.join
|
21
16
|
end
|
22
|
-
|
23
|
-
def prefix(text)
|
24
|
-
return text if @line_prefix.nil?
|
25
|
-
|
26
|
-
text.gsub(/^/, @line_prefix.to_s)
|
27
|
-
end
|
28
17
|
end
|
data/lib/deba.rb
CHANGED
@@ -1,28 +1,22 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
|
3
3
|
module Deba
|
4
|
-
VERSION = "0.
|
4
|
+
VERSION = "0.11.0"
|
5
5
|
end
|
6
6
|
|
7
7
|
require "deba/utils"
|
8
8
|
require "deba/stringifier"
|
9
|
-
require "deba/
|
10
|
-
require "deba/break"
|
9
|
+
require "deba/span"
|
11
10
|
require "deba/heading"
|
12
11
|
require "deba/list_item"
|
13
|
-
require "deba/blockquote"
|
14
12
|
require "deba/definition_term"
|
15
13
|
require "deba/definition_description"
|
16
14
|
require "deba/paragraph"
|
17
|
-
require "deba/
|
15
|
+
require "deba/document"
|
18
16
|
require "deba/extractor"
|
19
17
|
|
20
18
|
module Deba
|
21
19
|
def self.extract(html, options = {})
|
22
|
-
document(html, options).to_s
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.document(html, options = {})
|
26
20
|
doc = html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri.HTML(html)
|
27
21
|
Deba::Extractor.new(doc, options).extract
|
28
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brenton "B-Train" Fletcher
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -71,8 +71,6 @@ files:
|
|
71
71
|
- deba.gemspec
|
72
72
|
- exe/deba
|
73
73
|
- lib/deba.rb
|
74
|
-
- lib/deba/blockquote.rb
|
75
|
-
- lib/deba/break.rb
|
76
74
|
- lib/deba/definition_description.rb
|
77
75
|
- lib/deba/definition_term.rb
|
78
76
|
- lib/deba/document.rb
|
@@ -80,8 +78,8 @@ files:
|
|
80
78
|
- lib/deba/heading.rb
|
81
79
|
- lib/deba/list_item.rb
|
82
80
|
- lib/deba/paragraph.rb
|
81
|
+
- lib/deba/span.rb
|
83
82
|
- lib/deba/stringifier.rb
|
84
|
-
- lib/deba/text_runner.rb
|
85
83
|
- lib/deba/utils.rb
|
86
84
|
homepage: http://example.com
|
87
85
|
licenses:
|
data/lib/deba/blockquote.rb
DELETED
data/lib/deba/break.rb
DELETED
data/lib/deba/text_runner.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
class Deba::TextRunner
|
2
|
-
def initialize(document)
|
3
|
-
@document = document
|
4
|
-
|
5
|
-
start
|
6
|
-
end
|
7
|
-
|
8
|
-
def <<(segment)
|
9
|
-
@segments << segment
|
10
|
-
end
|
11
|
-
|
12
|
-
def break(*args)
|
13
|
-
finish
|
14
|
-
start(*args)
|
15
|
-
end
|
16
|
-
|
17
|
-
def finish
|
18
|
-
return unless present?
|
19
|
-
|
20
|
-
@args.unshift(@segments)
|
21
|
-
@document << @block_type.new(*@args)
|
22
|
-
end
|
23
|
-
|
24
|
-
def start(*args)
|
25
|
-
@segments = []
|
26
|
-
@block_type = args.shift
|
27
|
-
@args = args
|
28
|
-
end
|
29
|
-
|
30
|
-
def present?
|
31
|
-
@segments.any? { |segment| segment.is_a?(String) && Deba::Utils.present?(segment) }
|
32
|
-
end
|
33
|
-
end
|