article_json 0.3.5 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/article_json/elements/text_box.rb +7 -3
- data/lib/article_json/export/common/html/elements/image.rb +1 -1
- data/lib/article_json/import/google_doc/html/node_analyzer.rb +9 -1
- data/lib/article_json/import/google_doc/html/parser.rb +5 -1
- data/lib/article_json/import/google_doc/html/text_box_parser.rb +19 -2
- data/lib/article_json/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b7f9acdba2ed14993cb2ce87317b205c65992e79ed5662d79d6d3fb357908ff1
|
4
|
+
data.tar.gz: b7638f1a409aa31a52a911001684eeda751f254ecb357b4907710f24b97cbf95
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 297a7a8f4445d1525803cb3c57cad514e43cedc2c3c354d1c98d5fce603c78908dd7f3173f3879ab2f00cbce6c06b2c71c5a1261a046613a25f59fbfeb8cd6c4
|
7
|
+
data.tar.gz: af71578a715e7245563d3e204e9c767c45adbe56e5697ab320b7c736ad18259bb7111a90453a63d1ac8bd75b7139616823a35d6a039f16777435cb17aa848696
|
data/CHANGELOG.md
CHANGED
@@ -1,14 +1,16 @@
|
|
1
1
|
module ArticleJSON
|
2
2
|
module Elements
|
3
3
|
class TextBox < Base
|
4
|
-
attr_reader :content, :float
|
4
|
+
attr_reader :content, :float, :tags
|
5
5
|
|
6
6
|
# @param [Array[Paragraph|Heading|List]] content
|
7
7
|
# @param [Symbol] float
|
8
|
-
|
8
|
+
# @param [Array] tags
|
9
|
+
def initialize(content:, float: nil, tags: [])
|
9
10
|
@type = :text_box
|
10
11
|
@content = content
|
11
12
|
@float = float
|
13
|
+
@tags = tags
|
12
14
|
end
|
13
15
|
|
14
16
|
# Hash representation of this text box element
|
@@ -18,6 +20,7 @@ module ArticleJSON
|
|
18
20
|
type: type,
|
19
21
|
float: float,
|
20
22
|
content: content.map(&:to_h),
|
23
|
+
tags: tags,
|
21
24
|
}
|
22
25
|
end
|
23
26
|
|
@@ -27,7 +30,8 @@ module ArticleJSON
|
|
27
30
|
def parse_hash(hash)
|
28
31
|
new(
|
29
32
|
content: parse_hash_list(hash[:content]),
|
30
|
-
float: hash[:float]&.to_sym
|
33
|
+
float: hash[:float]&.to_sym,
|
34
|
+
tags: hash[:tags]
|
31
35
|
)
|
32
36
|
end
|
33
37
|
end
|
@@ -19,7 +19,7 @@ module ArticleJSON
|
|
19
19
|
# @return [Nokogiri::XML::NodeSet]
|
20
20
|
def figure_node
|
21
21
|
create_element(:figure, node_opts) do |figure|
|
22
|
-
node = @element
|
22
|
+
node = @element&.href ? href_node : image_node
|
23
23
|
figure.add_child(node)
|
24
24
|
if @element.caption&.any?
|
25
25
|
figure.add_child(caption_node(:figcaption))
|
@@ -17,6 +17,14 @@ module ArticleJSON
|
|
17
17
|
node.inner_text.strip.downcase == text.strip.downcase
|
18
18
|
end
|
19
19
|
|
20
|
+
# Check if the node text begins with a certain text
|
21
|
+
# @param [String]
|
22
|
+
# @return [Boolean]
|
23
|
+
def begins_with?(text)
|
24
|
+
first_word = node.inner_text.strip.downcase.split(' ').first
|
25
|
+
first_word == text.strip.downcase
|
26
|
+
end
|
27
|
+
|
20
28
|
# Check if the node is empty, i.e. not containing any text
|
21
29
|
# Given that images are the only nodes without text, we have to make
|
22
30
|
# sure that it's not an image.
|
@@ -65,7 +73,7 @@ module ArticleJSON
|
|
65
73
|
# @return [Boolean]
|
66
74
|
def text_box?
|
67
75
|
return @is_text_box if defined? @is_text_box
|
68
|
-
@is_text_box =
|
76
|
+
@is_text_box = begins_with?('textbox:') || begins_with?('highlight:')
|
69
77
|
end
|
70
78
|
|
71
79
|
# Check if the node starts a quote
|
@@ -83,7 +83,11 @@ module ArticleJSON
|
|
83
83
|
# @return [ArticleJSON::Elements::TextBox]
|
84
84
|
def parse_text_box
|
85
85
|
TextBoxParser
|
86
|
-
.new(
|
86
|
+
.new(
|
87
|
+
type_node: @current_node.node,
|
88
|
+
nodes: nodes_until_hr,
|
89
|
+
css_analyzer: @css_analyzer
|
90
|
+
)
|
87
91
|
.element
|
88
92
|
end
|
89
93
|
|
@@ -5,14 +5,18 @@ module ArticleJSON
|
|
5
5
|
class TextBoxParser
|
6
6
|
include Shared::Float
|
7
7
|
|
8
|
+
# @param [Nokogiri::HTML::Node] type_node Document node that states
|
9
|
+
# that this is a textbox.
|
10
|
+
# May contain tags, too.
|
8
11
|
# @param [Array[Nokogiri::HTML::Node]] nodes
|
9
12
|
# @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
|
10
|
-
def initialize(nodes:, css_analyzer:)
|
13
|
+
def initialize(type_node: ,nodes:, css_analyzer:)
|
11
14
|
@nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
|
12
15
|
@css_analyzer = css_analyzer
|
13
16
|
|
14
17
|
# First node of the text box indicates floating behavior
|
15
18
|
@float_node = @nodes.first
|
19
|
+
@type_node = type_node
|
16
20
|
end
|
17
21
|
|
18
22
|
# Parse the text box's nodes to get a list of sub elements
|
@@ -22,10 +26,23 @@ module ArticleJSON
|
|
22
26
|
@nodes.map { |node| parse_sub_node(node) }.compact
|
23
27
|
end
|
24
28
|
|
29
|
+
# Extract any potential tags, specified in brackets after the Textbox definition
|
30
|
+
# @return [Array[Symbol]]
|
31
|
+
def tags
|
32
|
+
match = /(.*?)[\s\u00A0]+\[(?<tags>.*)\]/
|
33
|
+
.match(@type_node.inner_text)
|
34
|
+
return [] unless match
|
35
|
+
match[:tags].split(' ')
|
36
|
+
end
|
37
|
+
|
25
38
|
# Hash representation of this text box
|
26
39
|
# @return [ArticleJSON::Elements::TextBox]
|
27
40
|
def element
|
28
|
-
ArticleJSON::Elements::TextBox.new(
|
41
|
+
ArticleJSON::Elements::TextBox.new(
|
42
|
+
float: float,
|
43
|
+
content: content,
|
44
|
+
tags: tags
|
45
|
+
)
|
29
46
|
end
|
30
47
|
|
31
48
|
private
|
data/lib/article_json/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: article_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Sager
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2019-08-06 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -229,8 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
229
229
|
- !ruby/object:Gem::Version
|
230
230
|
version: '0'
|
231
231
|
requirements: []
|
232
|
-
|
233
|
-
rubygems_version: 2.7.7
|
232
|
+
rubygems_version: 3.0.4
|
234
233
|
signing_key:
|
235
234
|
specification_version: 4
|
236
235
|
summary: JSON Format for News Articles & Ruby Gem
|