deba 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f39bcdf2a7a1d34172bb6c68196532c119c13fdb
4
- data.tar.gz: e911b81e44f25984e658abddabed6c9af80d895b
3
+ metadata.gz: 2c8a544deca7b66f05de8209b957f2ce5c07f1e2
4
+ data.tar.gz: 3ec0e8bb25b23ad2145867dc130ea9a9f475f64b
5
5
  SHA512:
6
- metadata.gz: b1f78c6a8cdde9ee7f7608bac295a262bbe7631c147ad6f43f541d02a0841de46615370cc7b48de20e150236ff1cb124384a80c4cd1ffe52e520bf3336929ba5
7
- data.tar.gz: 5fad6708011e66e11825e32e3edd01508dcc4e8cfe6eed2719c86abb08a2ccb1dc2f2c1d057a9494e8cce42d94d9cba16ee668f051b28bd24253f65967b3e596
6
+ metadata.gz: cdd042c6214229de58365925f74b435e14cc9f54f9ca40304189354706c7182a805ccf971bf5a8e59820164faafa6d7549f1d26f7b6fefcf77fb5f19f0c18018
7
+ data.tar.gz: 3bdfc3fcd212b0dacd8228c04c391324a67ec619c853b74255c063036a4748b4249abcb2e0052837c09428a42cd5fcbd7adb192278db760c1dd3ff491d79ef94
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "deba"
5
+
6
+ print Deba.extract(File.read(ARGV.first))
@@ -1,7 +1,7 @@
1
1
  require "nokogiri"
2
2
 
3
3
  module Deba
4
- VERSION = "0.6.0"
4
+ VERSION = "0.7.0"
5
5
  end
6
6
 
7
7
  require "deba/utils"
@@ -9,6 +9,7 @@ require "deba/stringifier"
9
9
  require "deba/document"
10
10
  require "deba/break"
11
11
  require "deba/heading"
12
+ require "deba/list_item"
12
13
  require "deba/paragraph"
13
14
  require "deba/text_runner"
14
15
  require "deba/extractor"
@@ -10,6 +10,6 @@ class Deba::Document
10
10
  end
11
11
 
12
12
  def to_s
13
- @blocks.map { |block| block.to_s }.join.strip
13
+ @blocks.map { |block| block.to_s }.join.chomp("\n")
14
14
  end
15
15
  end
@@ -1,7 +1,7 @@
1
1
  class Deba::Extractor
2
2
  HEADING_TAGS = %w(h1 h2 h3 h4 h5 h6)
3
3
  BLOCK_INITIATING_TAGS = %w(article aside body blockquote dd dt header li nav ol p pre section td th ul)
4
- ENHANCERS = { %w(b strong) => "*", %(i em) => "_" }
4
+ ENHANCERS = { %w(b strong) => "*", %w(i em) => "_" }
5
5
 
6
6
  attr_reader :blocks
7
7
 
@@ -59,6 +59,16 @@ class Deba::Extractor
59
59
  return
60
60
  end
61
61
 
62
+ if node_name == 'li'
63
+ last_item = node.xpath('count(following-sibling::li)').to_i == 0
64
+ index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
65
+ @text_run.break(Deba::ListItem, last_item, index)
66
+ node.children.each { |n| process(n) }
67
+ @text_run.break(Deba::Paragraph)
68
+
69
+ return
70
+ end
71
+
62
72
  #These tags terminate the current paragraph, if present, and start a new paragraph
63
73
  if BLOCK_INITIATING_TAGS.include?(node_name)
64
74
  @text_run.break(Deba::Paragraph)
@@ -0,0 +1,19 @@
1
+ class Deba::ListItem
2
+ attr_reader :segments
3
+
4
+ def initialize(segments, last, index)
5
+ @segments = segments
6
+ @last = last
7
+ @index = index
8
+ end
9
+
10
+ def to_s
11
+ prefix = if @index.nil?
12
+ "* "
13
+ else
14
+ "#{@index}. "
15
+ end
16
+
17
+ "#{prefix}#{Deba::Stringifier.new(@segments).stringify}\n#{"\n" if @last}"
18
+ end
19
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deba
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brenton "B-Train" Fletcher
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-30 00:00:00.000000000 Z
11
+ date: 2017-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -56,7 +56,8 @@ description: Deba takes a HTML document or fragment and extracts the text conten
56
56
  into a plaintext format that is a strict subset of markdown.
57
57
  email:
58
58
  - i@bloople.net
59
- executables: []
59
+ executables:
60
+ - deba
60
61
  extensions: []
61
62
  extra_rdoc_files: []
62
63
  files:
@@ -68,11 +69,13 @@ files:
68
69
  - bin/console
69
70
  - bin/setup
70
71
  - deba.gemspec
72
+ - exe/deba
71
73
  - lib/deba.rb
72
74
  - lib/deba/break.rb
73
75
  - lib/deba/document.rb
74
76
  - lib/deba/extractor.rb
75
77
  - lib/deba/heading.rb
78
+ - lib/deba/list_item.rb
76
79
  - lib/deba/paragraph.rb
77
80
  - lib/deba/stringifier.rb
78
81
  - lib/deba/text_runner.rb