deba 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1018f89ff2e632d52757b6dbbd5ca81ca85ab48e
4
- data.tar.gz: 61a006763ca3fb46a8b32bbb503016c61942d2f5
3
+ metadata.gz: bc1db7aa56a4e3a035d8cbefe54cd49742faee24
4
+ data.tar.gz: 7d1f0a909f5675ae614792642711fdf94f865469
5
5
  SHA512:
6
- metadata.gz: 2d1e257c802b4f8b5f0a7e40afd2adfc39d4ae77ee489e5b7cfc939554f8d949d985a49e7f0412663db88c3cd2e9d6b8cf6a1f6792449c166aea857b9669a063
7
- data.tar.gz: f0333aa1253df9e03bbc496d2973ed845beb32abc1050b2c9c71be46fdf4c3f6a4ea40fe59f56385b2f697d7426d35376c9879741b3926fb69dab6d1b120fcf9
6
+ metadata.gz: 78ca7576ddd7698df8851acf773cf566fbb29ceed3748cc73d0b5844add6a6665e760adecb14207358a2217e59b9d48caa1727cd92e71fc65adaa976119f55af
7
+ data.tar.gz: b2f5628703302d6cad7c5f63a841c53a3e24c973e1d6bf37e3a8216a128c5b2d011ba346d8238b8a933d3cfa551012b8f5a5bbd5da663f207a6cf6583d9b8639
@@ -1,7 +1,7 @@
1
1
  require "nokogiri"
2
2
 
3
3
  module Deba
4
- VERSION = "0.12.0"
4
+ VERSION = "0.13.0"
5
5
  end
6
6
 
7
7
  require "deba/utils"
@@ -1,4 +1,6 @@
1
1
  class Deba::Document
2
+ BLOCKQUOTE = "> "
3
+
2
4
  attr_reader :content
3
5
 
4
6
  def initialize(extractor)
@@ -22,7 +24,7 @@ class Deba::Document
22
24
 
23
25
  @args.unshift(@segments)
24
26
  block = @block_type.new(*@args).to_a
25
- block.unshift("> ") if @extractor.in_blockquote?
27
+ block.unshift(BLOCKQUOTE) if @extractor.in_blockquote?
26
28
 
27
29
  @content << Deba::Stringifier.new(block).stringify
28
30
  end
@@ -1,7 +1,34 @@
1
1
  class Deba::Extractor
2
2
  HEADING_TAGS = %w(h1 h2 h3 h4 h5 h6)
3
- BLOCK_INITIATING_TAGS = %w(article aside body blockquote div dd dt header li nav ol p pre section td th ul)
3
+ BLOCK_INITIATING_TAGS = %w(
4
+ address
5
+ article
6
+ aside
7
+ body
8
+ blockquote
9
+ div
10
+ dd
11
+ dl
12
+ dt
13
+ figure
14
+ footer
15
+ header
16
+ li
17
+ main
18
+ nav
19
+ ol
20
+ p
21
+ pre
22
+ section
23
+ td
24
+ th
25
+ ul)
4
26
  ENHANCERS = { %w(b strong) => "*", %w(i em) => "_" }
27
+ SKIP_TAGS = %w(
28
+ head
29
+ style
30
+ script
31
+ )
5
32
 
6
33
  attr_reader :blocks
7
34
 
@@ -27,7 +54,7 @@ class Deba::Extractor
27
54
 
28
55
  node_name = node.name.downcase
29
56
 
30
- return if node_name == 'head'
57
+ return if SKIP_TAGS.include?(node_name)
31
58
 
32
59
  #Handle repeated brs by making a paragraph break
33
60
  if node_name == 'br'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deba
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brenton "B-Train" Fletcher