deba 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f39bcdf2a7a1d34172bb6c68196532c119c13fdb
4
- data.tar.gz: e911b81e44f25984e658abddabed6c9af80d895b
3
+ metadata.gz: 2c8a544deca7b66f05de8209b957f2ce5c07f1e2
4
+ data.tar.gz: 3ec0e8bb25b23ad2145867dc130ea9a9f475f64b
5
5
  SHA512:
6
- metadata.gz: b1f78c6a8cdde9ee7f7608bac295a262bbe7631c147ad6f43f541d02a0841de46615370cc7b48de20e150236ff1cb124384a80c4cd1ffe52e520bf3336929ba5
7
- data.tar.gz: 5fad6708011e66e11825e32e3edd01508dcc4e8cfe6eed2719c86abb08a2ccb1dc2f2c1d057a9494e8cce42d94d9cba16ee668f051b28bd24253f65967b3e596
6
+ metadata.gz: cdd042c6214229de58365925f74b435e14cc9f54f9ca40304189354706c7182a805ccf971bf5a8e59820164faafa6d7549f1d26f7b6fefcf77fb5f19f0c18018
7
+ data.tar.gz: 3bdfc3fcd212b0dacd8228c04c391324a67ec619c853b74255c063036a4748b4249abcb2e0052837c09428a42cd5fcbd7adb192278db760c1dd3ff491d79ef94
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "deba"
5
+
6
+ print Deba.extract(File.read(ARGV.first))
@@ -1,7 +1,7 @@
1
1
  require "nokogiri"
2
2
 
3
3
  module Deba
4
- VERSION = "0.6.0"
4
+ VERSION = "0.7.0"
5
5
  end
6
6
 
7
7
  require "deba/utils"
@@ -9,6 +9,7 @@ require "deba/stringifier"
9
9
  require "deba/document"
10
10
  require "deba/break"
11
11
  require "deba/heading"
12
+ require "deba/list_item"
12
13
  require "deba/paragraph"
13
14
  require "deba/text_runner"
14
15
  require "deba/extractor"
@@ -10,6 +10,6 @@ class Deba::Document
10
10
  end
11
11
 
12
12
  def to_s
13
- @blocks.map { |block| block.to_s }.join.strip
13
+ @blocks.map { |block| block.to_s }.join.chomp("\n")
14
14
  end
15
15
  end
@@ -1,7 +1,7 @@
1
1
  class Deba::Extractor
2
2
  HEADING_TAGS = %w(h1 h2 h3 h4 h5 h6)
3
3
  BLOCK_INITIATING_TAGS = %w(article aside body blockquote dd dt header li nav ol p pre section td th ul)
4
- ENHANCERS = { %w(b strong) => "*", %(i em) => "_" }
4
+ ENHANCERS = { %w(b strong) => "*", %w(i em) => "_" }
5
5
 
6
6
  attr_reader :blocks
7
7
 
@@ -59,6 +59,16 @@ class Deba::Extractor
59
59
  return
60
60
  end
61
61
 
62
+ if node_name == 'li'
63
+ last_item = node.xpath('count(following-sibling::li)').to_i == 0
64
+ index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
65
+ @text_run.break(Deba::ListItem, last_item, index)
66
+ node.children.each { |n| process(n) }
67
+ @text_run.break(Deba::Paragraph)
68
+
69
+ return
70
+ end
71
+
62
72
  #These tags terminate the current paragraph, if present, and start a new paragraph
63
73
  if BLOCK_INITIATING_TAGS.include?(node_name)
64
74
  @text_run.break(Deba::Paragraph)
@@ -0,0 +1,19 @@
1
+ class Deba::ListItem
2
+ attr_reader :segments
3
+
4
+ def initialize(segments, last, index)
5
+ @segments = segments
6
+ @last = last
7
+ @index = index
8
+ end
9
+
10
+ def to_s
11
+ prefix = if @index.nil?
12
+ "* "
13
+ else
14
+ "#{@index}. "
15
+ end
16
+
17
+ "#{prefix}#{Deba::Stringifier.new(@segments).stringify}\n#{"\n" if @last}"
18
+ end
19
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deba
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brenton "B-Train" Fletcher
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-30 00:00:00.000000000 Z
11
+ date: 2017-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -56,7 +56,8 @@ description: Deba takes a HTML document or fragment and extracts the text conten
56
56
  into a plaintext format that is a strict subset of markdown.
57
57
  email:
58
58
  - i@bloople.net
59
- executables: []
59
+ executables:
60
+ - deba
60
61
  extensions: []
61
62
  extra_rdoc_files: []
62
63
  files:
@@ -68,11 +69,13 @@ files:
68
69
  - bin/console
69
70
  - bin/setup
70
71
  - deba.gemspec
72
+ - exe/deba
71
73
  - lib/deba.rb
72
74
  - lib/deba/break.rb
73
75
  - lib/deba/document.rb
74
76
  - lib/deba/extractor.rb
75
77
  - lib/deba/heading.rb
78
+ - lib/deba/list_item.rb
76
79
  - lib/deba/paragraph.rb
77
80
  - lib/deba/stringifier.rb
78
81
  - lib/deba/text_runner.rb