article_json 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f34cc736d928d12ee64d0181a15d9b69b2bdfc6c89e76c35ffd79cb6a8acb8d5
4
- data.tar.gz: 74c2c855413da0691866fe54d1927b4342bb5409a826807821866613203f3db9
3
+ metadata.gz: b7f9acdba2ed14993cb2ce87317b205c65992e79ed5662d79d6d3fb357908ff1
4
+ data.tar.gz: b7638f1a409aa31a52a911001684eeda751f254ecb357b4907710f24b97cbf95
5
5
  SHA512:
6
- metadata.gz: ec2b0a1718c9f57dd278ce4e11bcca41a84353b3594be06599204b838701199ecaf5e21bb36921ea68c98c7fa7083d2cd8f939fb7253b07305aa4258bbf47c25
7
- data.tar.gz: 381aad7bba231456aa45bf4f512bd2fa788dffa875b1eccf5a77d859cff28d5b061eb3b7a08c0e51b0915a57c9695c8113d07966b313be0d9ca00ad8759f2469
6
+ metadata.gz: 297a7a8f4445d1525803cb3c57cad514e43cedc2c3c354d1c98d5fce603c78908dd7f3173f3879ab2f00cbce6c06b2c71c5a1261a046613a25f59fbfeb8cd6c4
7
+ data.tar.gz: af71578a715e7245563d3e204e9c767c45adbe56e5697ab320b7c736ad18259bb7111a90453a63d1ac8bd75b7139616823a35d6a039f16777435cb17aa848696
data/CHANGELOG.md CHANGED
@@ -1,4 +1,7 @@
1
1
  # Changelog
2
+ ## 0.3.6 - 2019/8/6
3
+ - **Improvement** Added tags support in text_box element.
4
+
2
5
  ## 0.3.5 - 2018/12/12
3
6
  - **Improvements** to import and export image links from Google Docs
4
7
  - Import image `href`` from caption text using a custom tag
@@ -1,14 +1,16 @@
1
1
  module ArticleJSON
2
2
  module Elements
3
3
  class TextBox < Base
4
- attr_reader :content, :float
4
+ attr_reader :content, :float, :tags
5
5
 
6
6
  # @param [Array[Paragraph|Heading|List]] content
7
7
  # @param [Symbol] float
8
- def initialize(content:, float: nil)
8
+ # @param [Array] tags
9
+ def initialize(content:, float: nil, tags: [])
9
10
  @type = :text_box
10
11
  @content = content
11
12
  @float = float
13
+ @tags = tags
12
14
  end
13
15
 
14
16
  # Hash representation of this text box element
@@ -18,6 +20,7 @@ module ArticleJSON
18
20
  type: type,
19
21
  float: float,
20
22
  content: content.map(&:to_h),
23
+ tags: tags,
21
24
  }
22
25
  end
23
26
 
@@ -27,7 +30,8 @@ module ArticleJSON
27
30
  def parse_hash(hash)
28
31
  new(
29
32
  content: parse_hash_list(hash[:content]),
30
- float: hash[:float]&.to_sym
33
+ float: hash[:float]&.to_sym,
34
+ tags: hash[:tags]
31
35
  )
32
36
  end
33
37
  end
@@ -19,7 +19,7 @@ module ArticleJSON
19
19
  # @return [Nokogiri::XML::NodeSet]
20
20
  def figure_node
21
21
  create_element(:figure, node_opts) do |figure|
22
- node = @element.href.present? ? href_node : image_node
22
+ node = @element&.href ? href_node : image_node
23
23
  figure.add_child(node)
24
24
  if @element.caption&.any?
25
25
  figure.add_child(caption_node(:figcaption))
@@ -17,6 +17,14 @@ module ArticleJSON
17
17
  node.inner_text.strip.downcase == text.strip.downcase
18
18
  end
19
19
 
20
+ # Check if the node text begins with a certain text
21
+ # @param [String]
22
+ # @return [Boolean]
23
+ def begins_with?(text)
24
+ first_word = node.inner_text.strip.downcase.split(' ').first
25
+ first_word == text.strip.downcase
26
+ end
27
+
20
28
  # Check if the node is empty, i.e. not containing any text
21
29
  # Given that images are the only nodes without text, we have to make
22
30
  # sure that it's not an image.
@@ -65,7 +73,7 @@ module ArticleJSON
65
73
  # @return [Boolean]
66
74
  def text_box?
67
75
  return @is_text_box if defined? @is_text_box
68
- @is_text_box = has_text?('textbox:') || has_text?('highlight:')
76
+ @is_text_box = begins_with?('textbox:') || begins_with?('highlight:')
69
77
  end
70
78
 
71
79
  # Check if the node starts a quote
@@ -83,7 +83,11 @@ module ArticleJSON
83
83
  # @return [ArticleJSON::Elements::TextBox]
84
84
  def parse_text_box
85
85
  TextBoxParser
86
- .new(nodes: nodes_until_hr, css_analyzer: @css_analyzer)
86
+ .new(
87
+ type_node: @current_node.node,
88
+ nodes: nodes_until_hr,
89
+ css_analyzer: @css_analyzer
90
+ )
87
91
  .element
88
92
  end
89
93
 
@@ -5,14 +5,18 @@ module ArticleJSON
5
5
  class TextBoxParser
6
6
  include Shared::Float
7
7
 
8
+ # @param [Nokogiri::HTML::Node] type_node Document node that states
9
+ # that this is a textbox.
10
+ # May contain tags, too.
8
11
  # @param [Array[Nokogiri::HTML::Node]] nodes
9
12
  # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
10
- def initialize(nodes:, css_analyzer:)
13
+ def initialize(type_node: ,nodes:, css_analyzer:)
11
14
  @nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
12
15
  @css_analyzer = css_analyzer
13
16
 
14
17
  # First node of the text box indicates floating behavior
15
18
  @float_node = @nodes.first
19
+ @type_node = type_node
16
20
  end
17
21
 
18
22
  # Parse the text box's nodes to get a list of sub elements
@@ -22,10 +26,23 @@ module ArticleJSON
22
26
  @nodes.map { |node| parse_sub_node(node) }.compact
23
27
  end
24
28
 
29
+ # Extract any potential tags, specified in brackets after the Textbox definition
30
+ # @return [Array[Symbol]]
31
+ def tags
32
+ match = /(.*?)[\s\u00A0]+\[(?<tags>.*)\]/
33
+ .match(@type_node.inner_text)
34
+ return [] unless match
35
+ match[:tags].split(' ')
36
+ end
37
+
25
38
  # Hash representation of this text box
26
39
  # @return [ArticleJSON::Elements::TextBox]
27
40
  def element
28
- ArticleJSON::Elements::TextBox.new(float: float, content: content)
41
+ ArticleJSON::Elements::TextBox.new(
42
+ float: float,
43
+ content: content,
44
+ tags: tags
45
+ )
29
46
  end
30
47
 
31
48
  private
@@ -1,3 +1,3 @@
1
1
  module ArticleJSON
2
- VERSION = '0.3.5'
2
+ VERSION = '0.3.6'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: article_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Sager
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2018-12-12 00:00:00.000000000 Z
13
+ date: 2019-08-06 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: nokogiri
@@ -229,8 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
229
229
  - !ruby/object:Gem::Version
230
230
  version: '0'
231
231
  requirements: []
232
- rubyforge_project:
233
- rubygems_version: 2.7.7
232
+ rubygems_version: 3.0.4
234
233
  signing_key:
235
234
  specification_version: 4
236
235
  summary: JSON Format for News Articles & Ruby Gem