deba 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/deba +6 -0
- data/lib/deba.rb +2 -1
- data/lib/deba/document.rb +1 -1
- data/lib/deba/extractor.rb +11 -1
- data/lib/deba/list_item.rb +19 -0
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c8a544deca7b66f05de8209b957f2ce5c07f1e2
|
4
|
+
data.tar.gz: 3ec0e8bb25b23ad2145867dc130ea9a9f475f64b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cdd042c6214229de58365925f74b435e14cc9f54f9ca40304189354706c7182a805ccf971bf5a8e59820164faafa6d7549f1d26f7b6fefcf77fb5f19f0c18018
|
7
|
+
data.tar.gz: 3bdfc3fcd212b0dacd8228c04c391324a67ec619c853b74255c063036a4748b4249abcb2e0052837c09428a42cd5fcbd7adb192278db760c1dd3ff491d79ef94
|
data/exe/deba
ADDED
data/lib/deba.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
|
3
3
|
module Deba
|
4
|
-
VERSION = "0.
|
4
|
+
VERSION = "0.7.0"
|
5
5
|
end
|
6
6
|
|
7
7
|
require "deba/utils"
|
@@ -9,6 +9,7 @@ require "deba/stringifier"
|
|
9
9
|
require "deba/document"
|
10
10
|
require "deba/break"
|
11
11
|
require "deba/heading"
|
12
|
+
require "deba/list_item"
|
12
13
|
require "deba/paragraph"
|
13
14
|
require "deba/text_runner"
|
14
15
|
require "deba/extractor"
|
data/lib/deba/document.rb
CHANGED
data/lib/deba/extractor.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
class Deba::Extractor
|
2
2
|
HEADING_TAGS = %w(h1 h2 h3 h4 h5 h6)
|
3
3
|
BLOCK_INITIATING_TAGS = %w(article aside body blockquote dd dt header li nav ol p pre section td th ul)
|
4
|
-
ENHANCERS = { %w(b strong) => "*", %(i em) => "_" }
|
4
|
+
ENHANCERS = { %w(b strong) => "*", %w(i em) => "_" }
|
5
5
|
|
6
6
|
attr_reader :blocks
|
7
7
|
|
@@ -59,6 +59,16 @@ class Deba::Extractor
|
|
59
59
|
return
|
60
60
|
end
|
61
61
|
|
62
|
+
if node_name == 'li'
|
63
|
+
last_item = node.xpath('count(following-sibling::li)').to_i == 0
|
64
|
+
index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
|
65
|
+
@text_run.break(Deba::ListItem, last_item, index)
|
66
|
+
node.children.each { |n| process(n) }
|
67
|
+
@text_run.break(Deba::Paragraph)
|
68
|
+
|
69
|
+
return
|
70
|
+
end
|
71
|
+
|
62
72
|
#These tags terminate the current paragraph, if present, and start a new paragraph
|
63
73
|
if BLOCK_INITIATING_TAGS.include?(node_name)
|
64
74
|
@text_run.break(Deba::Paragraph)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class Deba::ListItem
|
2
|
+
attr_reader :segments
|
3
|
+
|
4
|
+
def initialize(segments, last, index)
|
5
|
+
@segments = segments
|
6
|
+
@last = last
|
7
|
+
@index = index
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_s
|
11
|
+
prefix = if @index.nil?
|
12
|
+
"* "
|
13
|
+
else
|
14
|
+
"#{@index}. "
|
15
|
+
end
|
16
|
+
|
17
|
+
"#{prefix}#{Deba::Stringifier.new(@segments).stringify}\n#{"\n" if @last}"
|
18
|
+
end
|
19
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deba
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brenton "B-Train" Fletcher
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-01
|
11
|
+
date: 2017-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -56,7 +56,8 @@ description: Deba takes a HTML document or fragment and extracts the text conten
|
|
56
56
|
into a plaintext format that is a strict subset of markdown.
|
57
57
|
email:
|
58
58
|
- i@bloople.net
|
59
|
-
executables:
|
59
|
+
executables:
|
60
|
+
- deba
|
60
61
|
extensions: []
|
61
62
|
extra_rdoc_files: []
|
62
63
|
files:
|
@@ -68,11 +69,13 @@ files:
|
|
68
69
|
- bin/console
|
69
70
|
- bin/setup
|
70
71
|
- deba.gemspec
|
72
|
+
- exe/deba
|
71
73
|
- lib/deba.rb
|
72
74
|
- lib/deba/break.rb
|
73
75
|
- lib/deba/document.rb
|
74
76
|
- lib/deba/extractor.rb
|
75
77
|
- lib/deba/heading.rb
|
78
|
+
- lib/deba/list_item.rb
|
76
79
|
- lib/deba/paragraph.rb
|
77
80
|
- lib/deba/stringifier.rb
|
78
81
|
- lib/deba/text_runner.rb
|