article_json 0.3.5 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/article_json/elements/text_box.rb +7 -3
- data/lib/article_json/export/common/html/elements/image.rb +1 -1
- data/lib/article_json/import/google_doc/html/node_analyzer.rb +9 -1
- data/lib/article_json/import/google_doc/html/parser.rb +5 -1
- data/lib/article_json/import/google_doc/html/text_box_parser.rb +19 -2
- data/lib/article_json/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7f9acdba2ed14993cb2ce87317b205c65992e79ed5662d79d6d3fb357908ff1
|
4
|
+
data.tar.gz: b7638f1a409aa31a52a911001684eeda751f254ecb357b4907710f24b97cbf95
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 297a7a8f4445d1525803cb3c57cad514e43cedc2c3c354d1c98d5fce603c78908dd7f3173f3879ab2f00cbce6c06b2c71c5a1261a046613a25f59fbfeb8cd6c4
|
7
|
+
data.tar.gz: af71578a715e7245563d3e204e9c767c45adbe56e5697ab320b7c736ad18259bb7111a90453a63d1ac8bd75b7139616823a35d6a039f16777435cb17aa848696
|
data/CHANGELOG.md
CHANGED
@@ -1,14 +1,16 @@
|
|
1
1
|
module ArticleJSON
|
2
2
|
module Elements
|
3
3
|
class TextBox < Base
|
4
|
-
attr_reader :content, :float
|
4
|
+
attr_reader :content, :float, :tags
|
5
5
|
|
6
6
|
# @param [Array[Paragraph|Heading|List]] content
|
7
7
|
# @param [Symbol] float
|
8
|
-
|
8
|
+
# @param [Array] tags
|
9
|
+
def initialize(content:, float: nil, tags: [])
|
9
10
|
@type = :text_box
|
10
11
|
@content = content
|
11
12
|
@float = float
|
13
|
+
@tags = tags
|
12
14
|
end
|
13
15
|
|
14
16
|
# Hash representation of this text box element
|
@@ -18,6 +20,7 @@ module ArticleJSON
|
|
18
20
|
type: type,
|
19
21
|
float: float,
|
20
22
|
content: content.map(&:to_h),
|
23
|
+
tags: tags,
|
21
24
|
}
|
22
25
|
end
|
23
26
|
|
@@ -27,7 +30,8 @@ module ArticleJSON
|
|
27
30
|
def parse_hash(hash)
|
28
31
|
new(
|
29
32
|
content: parse_hash_list(hash[:content]),
|
30
|
-
float: hash[:float]&.to_sym
|
33
|
+
float: hash[:float]&.to_sym,
|
34
|
+
tags: hash[:tags]
|
31
35
|
)
|
32
36
|
end
|
33
37
|
end
|
@@ -19,7 +19,7 @@ module ArticleJSON
|
|
19
19
|
# @return [Nokogiri::XML::NodeSet]
|
20
20
|
def figure_node
|
21
21
|
create_element(:figure, node_opts) do |figure|
|
22
|
-
node = @element
|
22
|
+
node = @element&.href ? href_node : image_node
|
23
23
|
figure.add_child(node)
|
24
24
|
if @element.caption&.any?
|
25
25
|
figure.add_child(caption_node(:figcaption))
|
@@ -17,6 +17,14 @@ module ArticleJSON
|
|
17
17
|
node.inner_text.strip.downcase == text.strip.downcase
|
18
18
|
end
|
19
19
|
|
20
|
+
# Check if the node text begins with a certain text
|
21
|
+
# @param [String]
|
22
|
+
# @return [Boolean]
|
23
|
+
def begins_with?(text)
|
24
|
+
first_word = node.inner_text.strip.downcase.split(' ').first
|
25
|
+
first_word == text.strip.downcase
|
26
|
+
end
|
27
|
+
|
20
28
|
# Check if the node is empty, i.e. not containing any text
|
21
29
|
# Given that images are the only nodes without text, we have to make
|
22
30
|
# sure that it's not an image.
|
@@ -65,7 +73,7 @@ module ArticleJSON
|
|
65
73
|
# @return [Boolean]
|
66
74
|
def text_box?
|
67
75
|
return @is_text_box if defined? @is_text_box
|
68
|
-
@is_text_box =
|
76
|
+
@is_text_box = begins_with?('textbox:') || begins_with?('highlight:')
|
69
77
|
end
|
70
78
|
|
71
79
|
# Check if the node starts a quote
|
@@ -83,7 +83,11 @@ module ArticleJSON
|
|
83
83
|
# @return [ArticleJSON::Elements::TextBox]
|
84
84
|
def parse_text_box
|
85
85
|
TextBoxParser
|
86
|
-
.new(
|
86
|
+
.new(
|
87
|
+
type_node: @current_node.node,
|
88
|
+
nodes: nodes_until_hr,
|
89
|
+
css_analyzer: @css_analyzer
|
90
|
+
)
|
87
91
|
.element
|
88
92
|
end
|
89
93
|
|
@@ -5,14 +5,18 @@ module ArticleJSON
|
|
5
5
|
class TextBoxParser
|
6
6
|
include Shared::Float
|
7
7
|
|
8
|
+
# @param [Nokogiri::HTML::Node] type_node Document node that states
|
9
|
+
# that this is a textbox.
|
10
|
+
# May contain tags, too.
|
8
11
|
# @param [Array[Nokogiri::HTML::Node]] nodes
|
9
12
|
# @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
|
10
|
-
def initialize(nodes:, css_analyzer:)
|
13
|
+
def initialize(type_node: ,nodes:, css_analyzer:)
|
11
14
|
@nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
|
12
15
|
@css_analyzer = css_analyzer
|
13
16
|
|
14
17
|
# First node of the text box indicates floating behavior
|
15
18
|
@float_node = @nodes.first
|
19
|
+
@type_node = type_node
|
16
20
|
end
|
17
21
|
|
18
22
|
# Parse the text box's nodes to get a list of sub elements
|
@@ -22,10 +26,23 @@ module ArticleJSON
|
|
22
26
|
@nodes.map { |node| parse_sub_node(node) }.compact
|
23
27
|
end
|
24
28
|
|
29
|
+
# Extract any potential tags, specified in brackets after the Textbox definition
|
30
|
+
# @return [Array[Symbol]]
|
31
|
+
def tags
|
32
|
+
match = /(.*?)[\s\u00A0]+\[(?<tags>.*)\]/
|
33
|
+
.match(@type_node.inner_text)
|
34
|
+
return [] unless match
|
35
|
+
match[:tags].split(' ')
|
36
|
+
end
|
37
|
+
|
25
38
|
# Hash representation of this text box
|
26
39
|
# @return [ArticleJSON::Elements::TextBox]
|
27
40
|
def element
|
28
|
-
ArticleJSON::Elements::TextBox.new(
|
41
|
+
ArticleJSON::Elements::TextBox.new(
|
42
|
+
float: float,
|
43
|
+
content: content,
|
44
|
+
tags: tags
|
45
|
+
)
|
29
46
|
end
|
30
47
|
|
31
48
|
private
|
data/lib/article_json/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: article_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Sager
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2019-08-06 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -229,8 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
229
229
|
- !ruby/object:Gem::Version
|
230
230
|
version: '0'
|
231
231
|
requirements: []
|
232
|
-
|
233
|
-
rubygems_version: 2.7.7
|
232
|
+
rubygems_version: 3.0.4
|
234
233
|
signing_key:
|
235
234
|
specification_version: 4
|
236
235
|
summary: JSON Format for News Articles & Ruby Gem
|