deba 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/deba +6 -0
- data/lib/deba.rb +2 -1
- data/lib/deba/document.rb +1 -1
- data/lib/deba/extractor.rb +11 -1
- data/lib/deba/list_item.rb +19 -0
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c8a544deca7b66f05de8209b957f2ce5c07f1e2
|
4
|
+
data.tar.gz: 3ec0e8bb25b23ad2145867dc130ea9a9f475f64b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cdd042c6214229de58365925f74b435e14cc9f54f9ca40304189354706c7182a805ccf971bf5a8e59820164faafa6d7549f1d26f7b6fefcf77fb5f19f0c18018
|
7
|
+
data.tar.gz: 3bdfc3fcd212b0dacd8228c04c391324a67ec619c853b74255c063036a4748b4249abcb2e0052837c09428a42cd5fcbd7adb192278db760c1dd3ff491d79ef94
|
data/exe/deba
ADDED
data/lib/deba.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
|
3
3
|
module Deba
|
4
|
-
VERSION = "0.
|
4
|
+
VERSION = "0.7.0"
|
5
5
|
end
|
6
6
|
|
7
7
|
require "deba/utils"
|
@@ -9,6 +9,7 @@ require "deba/stringifier"
|
|
9
9
|
require "deba/document"
|
10
10
|
require "deba/break"
|
11
11
|
require "deba/heading"
|
12
|
+
require "deba/list_item"
|
12
13
|
require "deba/paragraph"
|
13
14
|
require "deba/text_runner"
|
14
15
|
require "deba/extractor"
|
data/lib/deba/document.rb
CHANGED
data/lib/deba/extractor.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
class Deba::Extractor
|
2
2
|
HEADING_TAGS = %w(h1 h2 h3 h4 h5 h6)
|
3
3
|
BLOCK_INITIATING_TAGS = %w(article aside body blockquote dd dt header li nav ol p pre section td th ul)
|
4
|
-
ENHANCERS = { %w(b strong) => "*", %(i em) => "_" }
|
4
|
+
ENHANCERS = { %w(b strong) => "*", %w(i em) => "_" }
|
5
5
|
|
6
6
|
attr_reader :blocks
|
7
7
|
|
@@ -59,6 +59,16 @@ class Deba::Extractor
|
|
59
59
|
return
|
60
60
|
end
|
61
61
|
|
62
|
+
if node_name == 'li'
|
63
|
+
last_item = node.xpath('count(following-sibling::li)').to_i == 0
|
64
|
+
index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
|
65
|
+
@text_run.break(Deba::ListItem, last_item, index)
|
66
|
+
node.children.each { |n| process(n) }
|
67
|
+
@text_run.break(Deba::Paragraph)
|
68
|
+
|
69
|
+
return
|
70
|
+
end
|
71
|
+
|
62
72
|
#These tags terminate the current paragraph, if present, and start a new paragraph
|
63
73
|
if BLOCK_INITIATING_TAGS.include?(node_name)
|
64
74
|
@text_run.break(Deba::Paragraph)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class Deba::ListItem
|
2
|
+
attr_reader :segments
|
3
|
+
|
4
|
+
def initialize(segments, last, index)
|
5
|
+
@segments = segments
|
6
|
+
@last = last
|
7
|
+
@index = index
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_s
|
11
|
+
prefix = if @index.nil?
|
12
|
+
"* "
|
13
|
+
else
|
14
|
+
"#{@index}. "
|
15
|
+
end
|
16
|
+
|
17
|
+
"#{prefix}#{Deba::Stringifier.new(@segments).stringify}\n#{"\n" if @last}"
|
18
|
+
end
|
19
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brenton "B-Train" Fletcher
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-01
|
11
|
+
date: 2017-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -56,7 +56,8 @@ description: Deba takes a HTML document or fragment and extracts the text conten
|
|
56
56
|
into a plaintext format that is a strict subset of markdown.
|
57
57
|
email:
|
58
58
|
- i@bloople.net
|
59
|
-
executables:
|
59
|
+
executables:
|
60
|
+
- deba
|
60
61
|
extensions: []
|
61
62
|
extra_rdoc_files: []
|
62
63
|
files:
|
@@ -68,11 +69,13 @@ files:
|
|
68
69
|
- bin/console
|
69
70
|
- bin/setup
|
70
71
|
- deba.gemspec
|
72
|
+
- exe/deba
|
71
73
|
- lib/deba.rb
|
72
74
|
- lib/deba/break.rb
|
73
75
|
- lib/deba/document.rb
|
74
76
|
- lib/deba/extractor.rb
|
75
77
|
- lib/deba/heading.rb
|
78
|
+
- lib/deba/list_item.rb
|
76
79
|
- lib/deba/paragraph.rb
|
77
80
|
- lib/deba/stringifier.rb
|
78
81
|
- lib/deba/text_runner.rb
|