deba 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d717c720283ce4e2ec8ff902b269c8fc79e15d92
4
- data.tar.gz: e589c45ed86c7b6929ffdee81f8228a4197f963a
3
+ metadata.gz: 64da76c5a1c04969f66f580d20e350c7bca13715
4
+ data.tar.gz: fa665cf07572193d1a3f6ffcfa976885bf50a9ad
5
5
  SHA512:
6
- metadata.gz: ff5850362d73ad3fcb1e3ed1708ed2fb5ae35ab6d364f6f9efec83d41a02bff57fc3f52968c253941d07d16ff9e16364247a1d226b6925a2f9ae45d14c226a3c
7
- data.tar.gz: e21024b05d064f279ee581f6841bc8e0d63dc4e7733b44151a2e9a0ef9d3aebf68a8103860503ed3c174f92af30c9851bf89f29c4e030ab936495f78bde4d71c
6
+ metadata.gz: ef43a8c834a219a8b4fa5ad78cc4a59f10b09ec637ca46574a2ef61d22cc879ac9da32fea5fbf904d396b6b95c9c64301c730a818914149d9e9db840776dc215
7
+ data.tar.gz: 0f06e42c7267719f135a4bbcd0e6d24565979b19c3ade21bf49e622fccb7bec3c7cfe332651dde6fcbb2158064e08f8a560b047a1f9c029a49398711d9678cb1
@@ -1,13 +1,10 @@
1
1
  class Deba::DefinitionDescription
2
- attr_reader :segments
3
-
4
- def initialize(segments, line_prefix, last)
2
+ def initialize(segments, last)
5
3
  @segments = segments
6
- @line_prefix = line_prefix
7
4
  @last = last
8
5
  end
9
6
 
10
- def to_s
11
- "#{Deba::Stringifier.new(@segments, @line_prefix).stringify}\n#{"\n" if @last}"
7
+ def to_a
8
+ @segments + ["\n#{"\n" if @last}"]
12
9
  end
13
10
  end
@@ -1,12 +1,9 @@
1
1
  class Deba::DefinitionTerm
2
- attr_reader :segments
3
-
4
- def initialize(segments, line_prefix)
2
+ def initialize(segments)
5
3
  @segments = segments
6
- @line_prefix = line_prefix
7
4
  end
8
5
 
9
- def to_s
10
- "#{Deba::Stringifier.new(@segments, @line_prefix).stringify}:\n"
6
+ def to_a
7
+ @segments + [":\n"]
11
8
  end
12
9
  end
data/lib/deba/document.rb CHANGED
@@ -1,15 +1,39 @@
1
1
  class Deba::Document
2
- attr_reader :blocks
2
+ attr_reader :content
3
3
 
4
- def initialize
5
- @blocks = []
4
+ def initialize(extractor)
5
+ @extractor = extractor
6
+ @content = ""
7
+
8
+ start
9
+ end
10
+
11
+ def <<(segment)
12
+ @segments << segment
13
+ end
14
+
15
+ def break(*args)
16
+ finish
17
+ start(*args)
18
+ end
19
+
20
+ def finish
21
+ return unless present?
22
+
23
+ @args.unshift(@segments)
24
+ block = @block_type.new(*@args).to_a
25
+ block.unshift("> ") if @extractor.in_blockquote?
26
+
27
+ @content << Deba::Stringifier.new(block).stringify
6
28
  end
7
29
 
8
- def <<(block)
9
- @blocks << block
30
+ def start(*args)
31
+ @segments = []
32
+ @block_type = args.shift
33
+ @args = args
10
34
  end
11
35
 
12
- def to_s
13
- @blocks.map { |block| block.to_s }.join.chomp("\n")
36
+ def present?
37
+ @segments.any? { |segment| segment.is_a?(Deba::Span) && Deba::Utils.present?(segment.to_s) }
14
38
  end
15
39
  end
@@ -12,12 +12,12 @@ class Deba::Extractor
12
12
 
13
13
  def extract
14
14
  @just_appended_br = false
15
- @document = Deba::Document.new
16
- @text_run = Deba::TextRunner.new(@document)
15
+ @in_blockquote = false
16
+ @document = Deba::Document.new(self)
17
17
 
18
18
  process(@node)
19
19
 
20
- @document
20
+ @document.content.chomp("\n")
21
21
  end
22
22
 
23
23
  def process(node)
@@ -34,7 +34,7 @@ class Deba::Extractor
34
34
  if @just_appended_br
35
35
  @just_appended_br = false
36
36
 
37
- @text_run.break(Deba::Paragraph, line_prefix(node))
37
+ @document.break(Deba::Paragraph)
38
38
 
39
39
  return
40
40
  else
@@ -43,11 +43,11 @@ class Deba::Extractor
43
43
  elsif @just_appended_br
44
44
  @just_appended_br = false
45
45
 
46
- @text_run << Deba::Break.new
46
+ @document << "\n"
47
47
  end
48
48
 
49
49
  if node.text?
50
- @text_run << node.inner_text if Deba::Utils.present?(node.inner_text)
50
+ @document << Deba::Span.new(node.inner_text) if Deba::Utils.present?(node.inner_text)
51
51
 
52
52
  return
53
53
  end
@@ -55,55 +55,68 @@ class Deba::Extractor
55
55
  if ENHANCERS.keys.flatten.include?(node_name)
56
56
  ENHANCERS.each_pair do |tags, nsf_rep|
57
57
  if tags.include?(node_name)
58
- @text_run << nsf_rep
58
+ @document << nsf_rep
59
59
  node.children.each { |n| process(n) }
60
- @text_run << nsf_rep
60
+ @document << nsf_rep
61
61
  end
62
62
  end
63
63
 
64
64
  return
65
65
  end
66
66
 
67
+ if node_name == 'blockquote'
68
+ @in_blockquote = true
69
+
70
+ @document.break(Deba::Paragraph)
71
+ node.children.each { |n| process(n) }
72
+ @document.break(Deba::Paragraph)
73
+
74
+ @in_blockquote = false
75
+
76
+ return
77
+ end
78
+
67
79
  if node_name == 'li'
68
80
  last_item = node.xpath('count(following-sibling::li)').to_i == 0
69
81
  index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
70
- @text_run.break(Deba::ListItem, line_prefix(node), last_item, index)
82
+
83
+ @document.break(Deba::ListItem, last_item, index)
71
84
  node.children.each { |n| process(n) }
72
- @text_run.break(Deba::Paragraph, line_prefix(node))
85
+ @document.break(Deba::Paragraph)
73
86
 
74
87
  return
75
88
  end
76
89
 
77
90
  if node_name == 'dt'
78
- @text_run.break(Deba::DefinitionTerm, line_prefix(node))
91
+ @document.break(Deba::DefinitionTerm)
79
92
  node.children.each { |n| process(n) }
80
- @text_run.break(Deba::Paragraph, line_prefix(node))
93
+ @document.break(Deba::Paragraph)
81
94
 
82
95
  return
83
96
  end
84
97
 
85
98
  if node_name == 'dd'
86
99
  last_item = node.xpath('count(following-sibling::dd)').to_i == 0
87
- @text_run.break(Deba::DefinitionDescription, line_prefix(node), last_item)
100
+ @document.break(Deba::DefinitionDescription, last_item)
88
101
  node.children.each { |n| process(n) }
89
- @text_run.break(Deba::Paragraph, line_prefix(node))
102
+ @document.break(Deba::Paragraph)
90
103
 
91
104
  return
92
105
  end
93
106
 
94
107
  #These tags terminate the current paragraph, if present, and start a new paragraph
95
108
  if BLOCK_INITIATING_TAGS.include?(node_name)
96
- @text_run.break(Deba::Paragraph, line_prefix(node))
109
+ @document.break(Deba::Paragraph)
97
110
  node.children.each { |n| process(n) }
98
- @text_run.break(Deba::Paragraph, line_prefix(node))
111
+ @document.break(Deba::Paragraph)
99
112
 
100
113
  return
101
114
  end
102
115
 
103
116
  if HEADING_TAGS.include?(node_name)
104
- @text_run.break(Deba::Heading, node_name[1..-1].to_i)
117
+ @document.break(Deba::Heading, node_name[1..-1].to_i)
105
118
  node.children.each { |n| process(n) }
106
- @text_run.break(Deba::Paragraph, line_prefix(node))
119
+ @document.break(Deba::Paragraph)
107
120
 
108
121
  return
109
122
  end
@@ -112,11 +125,7 @@ class Deba::Extractor
112
125
  node.children.each { |n| process(n) }
113
126
  end
114
127
 
115
- def line_prefix(node)
116
- if node.xpath('boolean(ancestor::blockquote)')
117
- Deba::Blockquote.new
118
- else
119
- nil
120
- end
128
+ def in_blockquote?
129
+ @in_blockquote
121
130
  end
122
131
  end
data/lib/deba/heading.rb CHANGED
@@ -1,12 +1,10 @@
1
1
  class Deba::Heading
2
- attr_reader :segments, :level
3
-
4
2
  def initialize(segments, level)
5
3
  @segments = segments
6
4
  @level = level
7
5
  end
8
6
 
9
- def to_s
10
- "#{"#" * @level} #{Deba::Stringifier.new(@segments).stringify}\n\n"
7
+ def to_a
8
+ ["#" * @level] + @segments + ["\n\n"]
11
9
  end
12
10
  end
@@ -1,15 +1,12 @@
1
1
  class Deba::ListItem
2
- attr_reader :segments
3
-
4
- def initialize(segments, line_prefix, last, index)
2
+ def initialize(segments, last, index)
5
3
  @segments = segments
6
- @line_prefix = line_prefix
7
4
  @last = last
8
5
  @index = index
9
6
  end
10
7
 
11
- def to_s
12
- "#{Deba::Stringifier.new([prefix] + @segments, @line_prefix).stringify}\n#{"\n" if @last}"
8
+ def to_a
9
+ [prefix] + @segments + ["\n#{"\n" if @last}"]
13
10
  end
14
11
 
15
12
  def prefix
@@ -1,12 +1,13 @@
1
1
  class Deba::Paragraph
2
- attr_reader :segments
3
-
4
- def initialize(segments, line_prefix)
2
+ def initialize(segments)
5
3
  @segments = segments
6
- @line_prefix = line_prefix
7
4
  end
8
5
 
9
- def to_s
10
- "#{Deba::Stringifier.new(@segments, @line_prefix).stringify}\n\n"
6
+ def always?
7
+ false
8
+ end
9
+
10
+ def to_a
11
+ @segments + ["\n\n"]
11
12
  end
12
13
  end
data/lib/deba/span.rb ADDED
@@ -0,0 +1,9 @@
1
+ class Deba::Span
2
+ def initialize(text)
3
+ @text = text
4
+ end
5
+
6
+ def to_s
7
+ @text
8
+ end
9
+ end
@@ -1,28 +1,17 @@
1
1
  class Deba::Stringifier
2
- def initialize(segments, line_prefix = nil)
2
+ def initialize(segments)
3
3
  @segments = segments
4
- @line_prefix = line_prefix
5
4
  end
6
5
 
7
6
  def stringify
8
- prefix(chunkify)
9
- end
10
-
11
- def chunkify
12
7
  chunks = @segments.chunk { |segment| segment.class }
13
8
 
14
9
  chunks.map do |type, chunk_segments|
15
- if type == String
16
- Deba::Utils.normalise(chunk_segments.join)
17
- elsif type == Deba::Break
18
- chunk_segments.map { |s| s.to_s }.join
10
+ if type == Deba::Span
11
+ Deba::Utils.normalise(chunk_segments.map { |s| s.to_s }.join)
12
+ else
13
+ chunk_segments.join
19
14
  end
20
15
  end.join
21
16
  end
22
-
23
- def prefix(text)
24
- return text if @line_prefix.nil?
25
-
26
- text.gsub(/^/, @line_prefix.to_s)
27
- end
28
17
  end
data/lib/deba.rb CHANGED
@@ -1,28 +1,22 @@
1
1
  require "nokogiri"
2
2
 
3
3
  module Deba
4
- VERSION = "0.10.0"
4
+ VERSION = "0.11.0"
5
5
  end
6
6
 
7
7
  require "deba/utils"
8
8
  require "deba/stringifier"
9
- require "deba/document"
10
- require "deba/break"
9
+ require "deba/span"
11
10
  require "deba/heading"
12
11
  require "deba/list_item"
13
- require "deba/blockquote"
14
12
  require "deba/definition_term"
15
13
  require "deba/definition_description"
16
14
  require "deba/paragraph"
17
- require "deba/text_runner"
15
+ require "deba/document"
18
16
  require "deba/extractor"
19
17
 
20
18
  module Deba
21
19
  def self.extract(html, options = {})
22
- document(html, options).to_s
23
- end
24
-
25
- def self.document(html, options = {})
26
20
  doc = html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri.HTML(html)
27
21
  Deba::Extractor.new(doc, options).extract
28
22
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deba
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brenton "B-Train" Fletcher
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-02-02 00:00:00.000000000 Z
11
+ date: 2017-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -71,8 +71,6 @@ files:
71
71
  - deba.gemspec
72
72
  - exe/deba
73
73
  - lib/deba.rb
74
- - lib/deba/blockquote.rb
75
- - lib/deba/break.rb
76
74
  - lib/deba/definition_description.rb
77
75
  - lib/deba/definition_term.rb
78
76
  - lib/deba/document.rb
@@ -80,8 +78,8 @@ files:
80
78
  - lib/deba/heading.rb
81
79
  - lib/deba/list_item.rb
82
80
  - lib/deba/paragraph.rb
81
+ - lib/deba/span.rb
83
82
  - lib/deba/stringifier.rb
84
- - lib/deba/text_runner.rb
85
83
  - lib/deba/utils.rb
86
84
  homepage: http://example.com
87
85
  licenses:
@@ -1,5 +0,0 @@
1
- class Deba::Blockquote
2
- def to_s
3
- "> "
4
- end
5
- end
data/lib/deba/break.rb DELETED
@@ -1,5 +0,0 @@
1
- class Deba::Break
2
- def to_s
3
- "\n"
4
- end
5
- end
@@ -1,33 +0,0 @@
1
- class Deba::TextRunner
2
- def initialize(document)
3
- @document = document
4
-
5
- start
6
- end
7
-
8
- def <<(segment)
9
- @segments << segment
10
- end
11
-
12
- def break(*args)
13
- finish
14
- start(*args)
15
- end
16
-
17
- def finish
18
- return unless present?
19
-
20
- @args.unshift(@segments)
21
- @document << @block_type.new(*@args)
22
- end
23
-
24
- def start(*args)
25
- @segments = []
26
- @block_type = args.shift
27
- @args = args
28
- end
29
-
30
- def present?
31
- @segments.any? { |segment| segment.is_a?(String) && Deba::Utils.present?(segment) }
32
- end
33
- end