article_json 0.3.5 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f34cc736d928d12ee64d0181a15d9b69b2bdfc6c89e76c35ffd79cb6a8acb8d5
4
- data.tar.gz: 74c2c855413da0691866fe54d1927b4342bb5409a826807821866613203f3db9
3
+ metadata.gz: b7f9acdba2ed14993cb2ce87317b205c65992e79ed5662d79d6d3fb357908ff1
4
+ data.tar.gz: b7638f1a409aa31a52a911001684eeda751f254ecb357b4907710f24b97cbf95
5
5
  SHA512:
6
- metadata.gz: ec2b0a1718c9f57dd278ce4e11bcca41a84353b3594be06599204b838701199ecaf5e21bb36921ea68c98c7fa7083d2cd8f939fb7253b07305aa4258bbf47c25
7
- data.tar.gz: 381aad7bba231456aa45bf4f512bd2fa788dffa875b1eccf5a77d859cff28d5b061eb3b7a08c0e51b0915a57c9695c8113d07966b313be0d9ca00ad8759f2469
6
+ metadata.gz: 297a7a8f4445d1525803cb3c57cad514e43cedc2c3c354d1c98d5fce603c78908dd7f3173f3879ab2f00cbce6c06b2c71c5a1261a046613a25f59fbfeb8cd6c4
7
+ data.tar.gz: af71578a715e7245563d3e204e9c767c45adbe56e5697ab320b7c736ad18259bb7111a90453a63d1ac8bd75b7139616823a35d6a039f16777435cb17aa848696
data/CHANGELOG.md CHANGED
@@ -1,4 +1,7 @@
1
1
  # Changelog
2
+ ## 0.3.6 - 2019/8/6
3
+ - **Improvement** Added tags support in text_box element.
4
+
2
5
  ## 0.3.5 - 2018/12/12
3
6
  - **Improvements** to import and export image links from Google Docs
4
7
  - Import image `href`` from caption text using a custom tag
@@ -1,14 +1,16 @@
1
1
  module ArticleJSON
2
2
  module Elements
3
3
  class TextBox < Base
4
- attr_reader :content, :float
4
+ attr_reader :content, :float, :tags
5
5
 
6
6
  # @param [Array[Paragraph|Heading|List]] content
7
7
  # @param [Symbol] float
8
- def initialize(content:, float: nil)
8
+ # @param [Array] tags
9
+ def initialize(content:, float: nil, tags: [])
9
10
  @type = :text_box
10
11
  @content = content
11
12
  @float = float
13
+ @tags = tags
12
14
  end
13
15
 
14
16
  # Hash representation of this text box element
@@ -18,6 +20,7 @@ module ArticleJSON
18
20
  type: type,
19
21
  float: float,
20
22
  content: content.map(&:to_h),
23
+ tags: tags,
21
24
  }
22
25
  end
23
26
 
@@ -27,7 +30,8 @@ module ArticleJSON
27
30
  def parse_hash(hash)
28
31
  new(
29
32
  content: parse_hash_list(hash[:content]),
30
- float: hash[:float]&.to_sym
33
+ float: hash[:float]&.to_sym,
34
+ tags: hash[:tags]
31
35
  )
32
36
  end
33
37
  end
@@ -19,7 +19,7 @@ module ArticleJSON
19
19
  # @return [Nokogiri::XML::NodeSet]
20
20
  def figure_node
21
21
  create_element(:figure, node_opts) do |figure|
22
- node = @element.href.present? ? href_node : image_node
22
+ node = @element&.href ? href_node : image_node
23
23
  figure.add_child(node)
24
24
  if @element.caption&.any?
25
25
  figure.add_child(caption_node(:figcaption))
@@ -17,6 +17,14 @@ module ArticleJSON
17
17
  node.inner_text.strip.downcase == text.strip.downcase
18
18
  end
19
19
 
20
+ # Check if the node text begins with a certain text
21
+ # @param [String]
22
+ # @return [Boolean]
23
+ def begins_with?(text)
24
+ first_word = node.inner_text.strip.downcase.split(' ').first
25
+ first_word == text.strip.downcase
26
+ end
27
+
20
28
  # Check if the node is empty, i.e. not containing any text
21
29
  # Given that images are the only nodes without text, we have to make
22
30
  # sure that it's not an image.
@@ -65,7 +73,7 @@ module ArticleJSON
65
73
  # @return [Boolean]
66
74
  def text_box?
67
75
  return @is_text_box if defined? @is_text_box
68
- @is_text_box = has_text?('textbox:') || has_text?('highlight:')
76
+ @is_text_box = begins_with?('textbox:') || begins_with?('highlight:')
69
77
  end
70
78
 
71
79
  # Check if the node starts a quote
@@ -83,7 +83,11 @@ module ArticleJSON
83
83
  # @return [ArticleJSON::Elements::TextBox]
84
84
  def parse_text_box
85
85
  TextBoxParser
86
- .new(nodes: nodes_until_hr, css_analyzer: @css_analyzer)
86
+ .new(
87
+ type_node: @current_node.node,
88
+ nodes: nodes_until_hr,
89
+ css_analyzer: @css_analyzer
90
+ )
87
91
  .element
88
92
  end
89
93
 
@@ -5,14 +5,18 @@ module ArticleJSON
5
5
  class TextBoxParser
6
6
  include Shared::Float
7
7
 
8
+ # @param [Nokogiri::HTML::Node] type_node Document node that states
9
+ # that this is a textbox.
10
+ # May contain tags, too.
8
11
  # @param [Array[Nokogiri::HTML::Node]] nodes
9
12
  # @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
10
- def initialize(nodes:, css_analyzer:)
13
+ def initialize(type_node: ,nodes:, css_analyzer:)
11
14
  @nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
12
15
  @css_analyzer = css_analyzer
13
16
 
14
17
  # First node of the text box indicates floating behavior
15
18
  @float_node = @nodes.first
19
+ @type_node = type_node
16
20
  end
17
21
 
18
22
  # Parse the text box's nodes to get a list of sub elements
@@ -22,10 +26,23 @@ module ArticleJSON
22
26
  @nodes.map { |node| parse_sub_node(node) }.compact
23
27
  end
24
28
 
29
+ # Extract any potential tags, specified in brackets after the Textbox definition
30
+ # @return [Array[Symbol]]
31
+ def tags
32
+ match = /(.*?)[\s\u00A0]+\[(?<tags>.*)\]/
33
+ .match(@type_node.inner_text)
34
+ return [] unless match
35
+ match[:tags].split(' ')
36
+ end
37
+
25
38
  # Hash representation of this text box
26
39
  # @return [ArticleJSON::Elements::TextBox]
27
40
  def element
28
- ArticleJSON::Elements::TextBox.new(float: float, content: content)
41
+ ArticleJSON::Elements::TextBox.new(
42
+ float: float,
43
+ content: content,
44
+ tags: tags
45
+ )
29
46
  end
30
47
 
31
48
  private
@@ -1,3 +1,3 @@
1
1
  module ArticleJSON
2
- VERSION = '0.3.5'
2
+ VERSION = '0.3.6'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: article_json
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Sager
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2018-12-12 00:00:00.000000000 Z
13
+ date: 2019-08-06 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: nokogiri
@@ -229,8 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
229
229
  - !ruby/object:Gem::Version
230
230
  version: '0'
231
231
  requirements: []
232
- rubyforge_project:
233
- rubygems_version: 2.7.7
232
+ rubygems_version: 3.0.4
234
233
  signing_key:
235
234
  specification_version: 4
236
235
  summary: JSON Format for News Articles & Ruby Gem