deba 0.10.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d717c720283ce4e2ec8ff902b269c8fc79e15d92
4
- data.tar.gz: e589c45ed86c7b6929ffdee81f8228a4197f963a
3
+ metadata.gz: 64da76c5a1c04969f66f580d20e350c7bca13715
4
+ data.tar.gz: fa665cf07572193d1a3f6ffcfa976885bf50a9ad
5
5
  SHA512:
6
- metadata.gz: ff5850362d73ad3fcb1e3ed1708ed2fb5ae35ab6d364f6f9efec83d41a02bff57fc3f52968c253941d07d16ff9e16364247a1d226b6925a2f9ae45d14c226a3c
7
- data.tar.gz: e21024b05d064f279ee581f6841bc8e0d63dc4e7733b44151a2e9a0ef9d3aebf68a8103860503ed3c174f92af30c9851bf89f29c4e030ab936495f78bde4d71c
6
+ metadata.gz: ef43a8c834a219a8b4fa5ad78cc4a59f10b09ec637ca46574a2ef61d22cc879ac9da32fea5fbf904d396b6b95c9c64301c730a818914149d9e9db840776dc215
7
+ data.tar.gz: 0f06e42c7267719f135a4bbcd0e6d24565979b19c3ade21bf49e622fccb7bec3c7cfe332651dde6fcbb2158064e08f8a560b047a1f9c029a49398711d9678cb1
@@ -1,13 +1,10 @@
1
1
  class Deba::DefinitionDescription
2
- attr_reader :segments
3
-
4
- def initialize(segments, line_prefix, last)
2
+ def initialize(segments, last)
5
3
  @segments = segments
6
- @line_prefix = line_prefix
7
4
  @last = last
8
5
  end
9
6
 
10
- def to_s
11
- "#{Deba::Stringifier.new(@segments, @line_prefix).stringify}\n#{"\n" if @last}"
7
+ def to_a
8
+ @segments + ["\n#{"\n" if @last}"]
12
9
  end
13
10
  end
@@ -1,12 +1,9 @@
1
1
  class Deba::DefinitionTerm
2
- attr_reader :segments
3
-
4
- def initialize(segments, line_prefix)
2
+ def initialize(segments)
5
3
  @segments = segments
6
- @line_prefix = line_prefix
7
4
  end
8
5
 
9
- def to_s
10
- "#{Deba::Stringifier.new(@segments, @line_prefix).stringify}:\n"
6
+ def to_a
7
+ @segments + [":\n"]
11
8
  end
12
9
  end
data/lib/deba/document.rb CHANGED
@@ -1,15 +1,39 @@
1
1
  class Deba::Document
2
- attr_reader :blocks
2
+ attr_reader :content
3
3
 
4
- def initialize
5
- @blocks = []
4
+ def initialize(extractor)
5
+ @extractor = extractor
6
+ @content = ""
7
+
8
+ start
9
+ end
10
+
11
+ def <<(segment)
12
+ @segments << segment
13
+ end
14
+
15
+ def break(*args)
16
+ finish
17
+ start(*args)
18
+ end
19
+
20
+ def finish
21
+ return unless present?
22
+
23
+ @args.unshift(@segments)
24
+ block = @block_type.new(*@args).to_a
25
+ block.unshift("> ") if @extractor.in_blockquote?
26
+
27
+ @content << Deba::Stringifier.new(block).stringify
6
28
  end
7
29
 
8
- def <<(block)
9
- @blocks << block
30
+ def start(*args)
31
+ @segments = []
32
+ @block_type = args.shift
33
+ @args = args
10
34
  end
11
35
 
12
- def to_s
13
- @blocks.map { |block| block.to_s }.join.chomp("\n")
36
+ def present?
37
+ @segments.any? { |segment| segment.is_a?(Deba::Span) && Deba::Utils.present?(segment.to_s) }
14
38
  end
15
39
  end
@@ -12,12 +12,12 @@ class Deba::Extractor
12
12
 
13
13
  def extract
14
14
  @just_appended_br = false
15
- @document = Deba::Document.new
16
- @text_run = Deba::TextRunner.new(@document)
15
+ @in_blockquote = false
16
+ @document = Deba::Document.new(self)
17
17
 
18
18
  process(@node)
19
19
 
20
- @document
20
+ @document.content.chomp("\n")
21
21
  end
22
22
 
23
23
  def process(node)
@@ -34,7 +34,7 @@ class Deba::Extractor
34
34
  if @just_appended_br
35
35
  @just_appended_br = false
36
36
 
37
- @text_run.break(Deba::Paragraph, line_prefix(node))
37
+ @document.break(Deba::Paragraph)
38
38
 
39
39
  return
40
40
  else
@@ -43,11 +43,11 @@ class Deba::Extractor
43
43
  elsif @just_appended_br
44
44
  @just_appended_br = false
45
45
 
46
- @text_run << Deba::Break.new
46
+ @document << "\n"
47
47
  end
48
48
 
49
49
  if node.text?
50
- @text_run << node.inner_text if Deba::Utils.present?(node.inner_text)
50
+ @document << Deba::Span.new(node.inner_text) if Deba::Utils.present?(node.inner_text)
51
51
 
52
52
  return
53
53
  end
@@ -55,55 +55,68 @@ class Deba::Extractor
55
55
  if ENHANCERS.keys.flatten.include?(node_name)
56
56
  ENHANCERS.each_pair do |tags, nsf_rep|
57
57
  if tags.include?(node_name)
58
- @text_run << nsf_rep
58
+ @document << nsf_rep
59
59
  node.children.each { |n| process(n) }
60
- @text_run << nsf_rep
60
+ @document << nsf_rep
61
61
  end
62
62
  end
63
63
 
64
64
  return
65
65
  end
66
66
 
67
+ if node_name == 'blockquote'
68
+ @in_blockquote = true
69
+
70
+ @document.break(Deba::Paragraph)
71
+ node.children.each { |n| process(n) }
72
+ @document.break(Deba::Paragraph)
73
+
74
+ @in_blockquote = false
75
+
76
+ return
77
+ end
78
+
67
79
  if node_name == 'li'
68
80
  last_item = node.xpath('count(following-sibling::li)').to_i == 0
69
81
  index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
70
- @text_run.break(Deba::ListItem, line_prefix(node), last_item, index)
82
+
83
+ @document.break(Deba::ListItem, last_item, index)
71
84
  node.children.each { |n| process(n) }
72
- @text_run.break(Deba::Paragraph, line_prefix(node))
85
+ @document.break(Deba::Paragraph)
73
86
 
74
87
  return
75
88
  end
76
89
 
77
90
  if node_name == 'dt'
78
- @text_run.break(Deba::DefinitionTerm, line_prefix(node))
91
+ @document.break(Deba::DefinitionTerm)
79
92
  node.children.each { |n| process(n) }
80
- @text_run.break(Deba::Paragraph, line_prefix(node))
93
+ @document.break(Deba::Paragraph)
81
94
 
82
95
  return
83
96
  end
84
97
 
85
98
  if node_name == 'dd'
86
99
  last_item = node.xpath('count(following-sibling::dd)').to_i == 0
87
- @text_run.break(Deba::DefinitionDescription, line_prefix(node), last_item)
100
+ @document.break(Deba::DefinitionDescription, last_item)
88
101
  node.children.each { |n| process(n) }
89
- @text_run.break(Deba::Paragraph, line_prefix(node))
102
+ @document.break(Deba::Paragraph)
90
103
 
91
104
  return
92
105
  end
93
106
 
94
107
  #These tags terminate the current paragraph, if present, and start a new paragraph
95
108
  if BLOCK_INITIATING_TAGS.include?(node_name)
96
- @text_run.break(Deba::Paragraph, line_prefix(node))
109
+ @document.break(Deba::Paragraph)
97
110
  node.children.each { |n| process(n) }
98
- @text_run.break(Deba::Paragraph, line_prefix(node))
111
+ @document.break(Deba::Paragraph)
99
112
 
100
113
  return
101
114
  end
102
115
 
103
116
  if HEADING_TAGS.include?(node_name)
104
- @text_run.break(Deba::Heading, node_name[1..-1].to_i)
117
+ @document.break(Deba::Heading, node_name[1..-1].to_i)
105
118
  node.children.each { |n| process(n) }
106
- @text_run.break(Deba::Paragraph, line_prefix(node))
119
+ @document.break(Deba::Paragraph)
107
120
 
108
121
  return
109
122
  end
@@ -112,11 +125,7 @@ class Deba::Extractor
112
125
  node.children.each { |n| process(n) }
113
126
  end
114
127
 
115
- def line_prefix(node)
116
- if node.xpath('boolean(ancestor::blockquote)')
117
- Deba::Blockquote.new
118
- else
119
- nil
120
- end
128
+ def in_blockquote?
129
+ @in_blockquote
121
130
  end
122
131
  end
data/lib/deba/heading.rb CHANGED
@@ -1,12 +1,10 @@
1
1
  class Deba::Heading
2
- attr_reader :segments, :level
3
-
4
2
  def initialize(segments, level)
5
3
  @segments = segments
6
4
  @level = level
7
5
  end
8
6
 
9
- def to_s
10
- "#{"#" * @level} #{Deba::Stringifier.new(@segments).stringify}\n\n"
7
+ def to_a
8
+ ["#" * @level] + @segments + ["\n\n"]
11
9
  end
12
10
  end
@@ -1,15 +1,12 @@
1
1
  class Deba::ListItem
2
- attr_reader :segments
3
-
4
- def initialize(segments, line_prefix, last, index)
2
+ def initialize(segments, last, index)
5
3
  @segments = segments
6
- @line_prefix = line_prefix
7
4
  @last = last
8
5
  @index = index
9
6
  end
10
7
 
11
- def to_s
12
- "#{Deba::Stringifier.new([prefix] + @segments, @line_prefix).stringify}\n#{"\n" if @last}"
8
+ def to_a
9
+ [prefix] + @segments + ["\n#{"\n" if @last}"]
13
10
  end
14
11
 
15
12
  def prefix
@@ -1,12 +1,13 @@
1
1
  class Deba::Paragraph
2
- attr_reader :segments
3
-
4
- def initialize(segments, line_prefix)
2
+ def initialize(segments)
5
3
  @segments = segments
6
- @line_prefix = line_prefix
7
4
  end
8
5
 
9
- def to_s
10
- "#{Deba::Stringifier.new(@segments, @line_prefix).stringify}\n\n"
6
+ def always?
7
+ false
8
+ end
9
+
10
+ def to_a
11
+ @segments + ["\n\n"]
11
12
  end
12
13
  end
data/lib/deba/span.rb ADDED
@@ -0,0 +1,9 @@
1
+ class Deba::Span
2
+ def initialize(text)
3
+ @text = text
4
+ end
5
+
6
+ def to_s
7
+ @text
8
+ end
9
+ end
@@ -1,28 +1,17 @@
1
1
  class Deba::Stringifier
2
- def initialize(segments, line_prefix = nil)
2
+ def initialize(segments)
3
3
  @segments = segments
4
- @line_prefix = line_prefix
5
4
  end
6
5
 
7
6
  def stringify
8
- prefix(chunkify)
9
- end
10
-
11
- def chunkify
12
7
  chunks = @segments.chunk { |segment| segment.class }
13
8
 
14
9
  chunks.map do |type, chunk_segments|
15
- if type == String
16
- Deba::Utils.normalise(chunk_segments.join)
17
- elsif type == Deba::Break
18
- chunk_segments.map { |s| s.to_s }.join
10
+ if type == Deba::Span
11
+ Deba::Utils.normalise(chunk_segments.map { |s| s.to_s }.join)
12
+ else
13
+ chunk_segments.join
19
14
  end
20
15
  end.join
21
16
  end
22
-
23
- def prefix(text)
24
- return text if @line_prefix.nil?
25
-
26
- text.gsub(/^/, @line_prefix.to_s)
27
- end
28
17
  end
data/lib/deba.rb CHANGED
@@ -1,28 +1,22 @@
1
1
  require "nokogiri"
2
2
 
3
3
  module Deba
4
- VERSION = "0.10.0"
4
+ VERSION = "0.11.0"
5
5
  end
6
6
 
7
7
  require "deba/utils"
8
8
  require "deba/stringifier"
9
- require "deba/document"
10
- require "deba/break"
9
+ require "deba/span"
11
10
  require "deba/heading"
12
11
  require "deba/list_item"
13
- require "deba/blockquote"
14
12
  require "deba/definition_term"
15
13
  require "deba/definition_description"
16
14
  require "deba/paragraph"
17
- require "deba/text_runner"
15
+ require "deba/document"
18
16
  require "deba/extractor"
19
17
 
20
18
  module Deba
21
19
  def self.extract(html, options = {})
22
- document(html, options).to_s
23
- end
24
-
25
- def self.document(html, options = {})
26
20
  doc = html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri.HTML(html)
27
21
  Deba::Extractor.new(doc, options).extract
28
22
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deba
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brenton "B-Train" Fletcher
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-02-02 00:00:00.000000000 Z
11
+ date: 2017-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -71,8 +71,6 @@ files:
71
71
  - deba.gemspec
72
72
  - exe/deba
73
73
  - lib/deba.rb
74
- - lib/deba/blockquote.rb
75
- - lib/deba/break.rb
76
74
  - lib/deba/definition_description.rb
77
75
  - lib/deba/definition_term.rb
78
76
  - lib/deba/document.rb
@@ -80,8 +78,8 @@ files:
80
78
  - lib/deba/heading.rb
81
79
  - lib/deba/list_item.rb
82
80
  - lib/deba/paragraph.rb
81
+ - lib/deba/span.rb
83
82
  - lib/deba/stringifier.rb
84
- - lib/deba/text_runner.rb
85
83
  - lib/deba/utils.rb
86
84
  homepage: http://example.com
87
85
  licenses:
@@ -1,5 +0,0 @@
1
- class Deba::Blockquote
2
- def to_s
3
- "> "
4
- end
5
- end
data/lib/deba/break.rb DELETED
@@ -1,5 +0,0 @@
1
- class Deba::Break
2
- def to_s
3
- "\n"
4
- end
5
- end
@@ -1,33 +0,0 @@
1
- class Deba::TextRunner
2
- def initialize(document)
3
- @document = document
4
-
5
- start
6
- end
7
-
8
- def <<(segment)
9
- @segments << segment
10
- end
11
-
12
- def break(*args)
13
- finish
14
- start(*args)
15
- end
16
-
17
- def finish
18
- return unless present?
19
-
20
- @args.unshift(@segments)
21
- @document << @block_type.new(*@args)
22
- end
23
-
24
- def start(*args)
25
- @segments = []
26
- @block_type = args.shift
27
- @args = args
28
- end
29
-
30
- def present?
31
- @segments.any? { |segment| segment.is_a?(String) && Deba::Utils.present?(segment) }
32
- end
33
- end