article_json 0.3.8 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/README.md +108 -72
- data/bin/article_json_export_amp.rb +1 -0
- data/bin/article_json_export_apple_news.rb +15 -0
- data/bin/article_json_export_facebook.rb +1 -0
- data/bin/article_json_export_html.rb +1 -0
- data/bin/article_json_export_plain_text.rb +1 -0
- data/bin/article_json_parse_google_doc.rb +1 -0
- data/bin/check_google_doc_export.rb +41 -0
- data/bin/update_oembed_request-stubs.sh +1 -3
- data/bin/update_reference_document.sh +4 -0
- data/lib/article_json/article.rb +22 -2
- data/lib/article_json/configuration.rb +2 -1
- data/lib/article_json/elements/base.rb +0 -1
- data/lib/article_json/export/amp/elements/embed.rb +1 -1
- data/lib/article_json/export/apple_news/elements/base.rb +53 -0
- data/lib/article_json/export/apple_news/elements/embed.rb +130 -0
- data/lib/article_json/export/apple_news/elements/heading.rb +32 -0
- data/lib/article_json/export/apple_news/elements/image.rb +58 -0
- data/lib/article_json/export/apple_news/elements/list.rb +67 -0
- data/lib/article_json/export/apple_news/elements/paragraph.rb +36 -0
- data/lib/article_json/export/apple_news/elements/quote.rb +60 -0
- data/lib/article_json/export/apple_news/elements/text.rb +42 -0
- data/lib/article_json/export/apple_news/elements/text_box.rb +51 -0
- data/lib/article_json/export/apple_news/exporter.rb +37 -0
- data/lib/article_json/import/google_doc/html/image_parser.rb +16 -2
- data/lib/article_json/import/google_doc/html/node_analyzer.rb +11 -1
- data/lib/article_json/import/google_doc/html/parser.rb +6 -1
- data/lib/article_json/utils/o_embed_resolver/facebook_video.rb +17 -1
- data/lib/article_json/utils/o_embed_resolver/youtube_video.rb +1 -1
- data/lib/article_json/version.rb +1 -1
- data/lib/article_json.rb +11 -0
- metadata +33 -15
@@ -0,0 +1,130 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module AppleNews
|
4
|
+
module Elements
|
5
|
+
class Embed < Base
|
6
|
+
# Embed| Embed, Caption
|
7
|
+
# @return [Hash, Array<Hash>]
|
8
|
+
def export
|
9
|
+
caption_text.nil? ? embed : [embed, caption]
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
# Embed
|
15
|
+
# @return [Hash]
|
16
|
+
def embed
|
17
|
+
{
|
18
|
+
role: role,
|
19
|
+
URL: source_url,
|
20
|
+
caption: caption_text,
|
21
|
+
}.compact
|
22
|
+
end
|
23
|
+
|
24
|
+
# Caption
|
25
|
+
# @return [Hash]
|
26
|
+
def caption
|
27
|
+
{
|
28
|
+
role: 'caption',
|
29
|
+
text: caption_text,
|
30
|
+
format: 'html',
|
31
|
+
layout: 'captionLayout',
|
32
|
+
textStyle: 'captionStyle',
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
# Get the exporter class for text elements
|
37
|
+
# @return [ArticleJSON::Export::Common::HTML::Elements::Base]
|
38
|
+
def text_exporter
|
39
|
+
self.class.exporter_by_type(:text)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Caption Text
|
43
|
+
# @return [String]
|
44
|
+
def caption_text
|
45
|
+
return nil if role.nil? # Do not show captions for unsupported components
|
46
|
+
|
47
|
+
text.empty? ? nil : text
|
48
|
+
end
|
49
|
+
|
50
|
+
# @return [String]
|
51
|
+
def text
|
52
|
+
@element.caption.map do |child_element|
|
53
|
+
text_exporter.new(child_element)
|
54
|
+
.export
|
55
|
+
end.join
|
56
|
+
end
|
57
|
+
|
58
|
+
def role
|
59
|
+
@role ||=
|
60
|
+
case embed_type
|
61
|
+
when :youtube_video, :vimeo_video, :dailymotion_video
|
62
|
+
:embedwebvideo
|
63
|
+
when :facebook_video
|
64
|
+
:facebook_post
|
65
|
+
when :tweet
|
66
|
+
:tweet
|
67
|
+
when :slideshare
|
68
|
+
nil
|
69
|
+
when :soundcloud
|
70
|
+
nil
|
71
|
+
else
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def source_url
|
77
|
+
case embed_type
|
78
|
+
when :youtube_video
|
79
|
+
build_embeded_youtube_url
|
80
|
+
when :vimeo_video
|
81
|
+
build_embeded_vimeo_url
|
82
|
+
when :dailymotion_video
|
83
|
+
build_embeded_vimeo_url
|
84
|
+
when :facebook_video
|
85
|
+
build_facebook_video_url
|
86
|
+
when :tweet
|
87
|
+
build_twitter_url
|
88
|
+
when :slideshare
|
89
|
+
nil
|
90
|
+
when :soundcloud
|
91
|
+
nil
|
92
|
+
else
|
93
|
+
nil
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def build_embeded_youtube_url
|
98
|
+
"https://www.youtube.com/embed/#{embed_id}"
|
99
|
+
end
|
100
|
+
|
101
|
+
def build_embeded_vimeo_url
|
102
|
+
"https://player.vimeo.com/video/#{embed_id}"
|
103
|
+
end
|
104
|
+
|
105
|
+
def build_embeded_dailymotion_url
|
106
|
+
"https://geo.dailymotion.com/player.html?video=#{embed_id}"
|
107
|
+
end
|
108
|
+
|
109
|
+
def build_facebook_video_url
|
110
|
+
username, id = embed_id.to_s.split("/", 2)
|
111
|
+
"https://www.facebook.com/#{username}/videos/#{id}"
|
112
|
+
end
|
113
|
+
|
114
|
+
def build_twitter_url
|
115
|
+
username, id = embed_id.to_s.split("/", 2)
|
116
|
+
"https://twitter.com/#{username}/status/#{id}"
|
117
|
+
end
|
118
|
+
|
119
|
+
def embed_type
|
120
|
+
@embed_type ||= @element.embed_type.to_sym
|
121
|
+
end
|
122
|
+
|
123
|
+
def embed_id
|
124
|
+
@embed_id ||= @element.embed_id.to_sym
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module AppleNews
|
4
|
+
module Elements
|
5
|
+
class Heading < Base
|
6
|
+
# Headline
|
7
|
+
# @return [Hash]
|
8
|
+
def export
|
9
|
+
{
|
10
|
+
role: role,
|
11
|
+
text: @element.content,
|
12
|
+
layout: 'titleLayout',
|
13
|
+
textStyle: 'defaultTitle',
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# The role of text component for adding a heading. (Required) Always
|
20
|
+
# one of these roles for this component: heading, heading1, heading2,
|
21
|
+
# heading3, heading4, heading5, or heading6.
|
22
|
+
# @return [String]
|
23
|
+
def role
|
24
|
+
return 'heading' if @element.level.nil?
|
25
|
+
|
26
|
+
"heading#{@element.level}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module AppleNews
|
4
|
+
module Elements
|
5
|
+
class Image < Base
|
6
|
+
# Image | Image, Caption
|
7
|
+
# @return [Hash, Array<Hash>]
|
8
|
+
def export
|
9
|
+
caption_text.nil? ? image : [image, caption]
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
# Image
|
14
|
+
# @return [Hash]
|
15
|
+
def image
|
16
|
+
{
|
17
|
+
role: 'image',
|
18
|
+
URL: @element.source_url,
|
19
|
+
caption: caption_text,
|
20
|
+
}.compact
|
21
|
+
end
|
22
|
+
|
23
|
+
# Caption
|
24
|
+
# @return [Hash]
|
25
|
+
def caption
|
26
|
+
{
|
27
|
+
role: 'caption',
|
28
|
+
text: caption_text,
|
29
|
+
format: 'html',
|
30
|
+
layout: 'captionLayout',
|
31
|
+
textStyle: 'captionStyle',
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
# Get the exporter class for text elements
|
36
|
+
# @return [ArticleJSON::Export::Common::HTML::Elements::Base]
|
37
|
+
def text_exporter
|
38
|
+
self.class.exporter_by_type(:text)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Caption Text
|
42
|
+
# @return [String]
|
43
|
+
def caption_text
|
44
|
+
text.empty? ? nil : text
|
45
|
+
end
|
46
|
+
|
47
|
+
# @return [String]
|
48
|
+
def text
|
49
|
+
@element.caption.map do |child_element|
|
50
|
+
text_exporter.new(child_element)
|
51
|
+
.export
|
52
|
+
end.join
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module AppleNews
|
4
|
+
module Elements
|
5
|
+
class List < Base
|
6
|
+
# List
|
7
|
+
# @return [Hash]
|
8
|
+
def export
|
9
|
+
{
|
10
|
+
role: 'body',
|
11
|
+
text: list_text,
|
12
|
+
format: 'html',
|
13
|
+
layout: 'bodyLayout',
|
14
|
+
textStyle: 'bodyStyle',
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# Get the exporter class for text elements
|
21
|
+
#
|
22
|
+
# @return [ArticleJSON::Export::Common::HTML::Elements::<Class>]
|
23
|
+
def text_exporter
|
24
|
+
self.class.exporter_by_type(:text)
|
25
|
+
end
|
26
|
+
|
27
|
+
# When it is an unordered list wrap it in <ul></ul>
|
28
|
+
# When it is an ordered list wrap it in <ol></ol>
|
29
|
+
#
|
30
|
+
# List Text
|
31
|
+
# @return [String]
|
32
|
+
def list_text
|
33
|
+
prepend_list_tag + list + append_list_tag
|
34
|
+
end
|
35
|
+
|
36
|
+
# Each list item should be wrapped in <li></li>
|
37
|
+
#
|
38
|
+
# @return [String]
|
39
|
+
def list
|
40
|
+
@element.content.map do |paragraph_element|
|
41
|
+
line_item = paragraph_element.content.map do |text_element|
|
42
|
+
text_exporter.new(text_element).export
|
43
|
+
end.join
|
44
|
+
|
45
|
+
"<li>#{line_item}</li>"
|
46
|
+
end.join
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [String]
|
50
|
+
def prepend_list_tag
|
51
|
+
ordered_list? ? '<ol>' : '<ul>'
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [String]
|
55
|
+
def append_list_tag
|
56
|
+
ordered_list? ? '</ol>' : '</ul>'
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [Boolean]
|
60
|
+
def ordered_list?
|
61
|
+
@element.list_type == :ordered
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module AppleNews
|
4
|
+
module Elements
|
5
|
+
class Paragraph < Base
|
6
|
+
# Generate the paragraph node with its containing text elements
|
7
|
+
# @return [Hash]
|
8
|
+
def export
|
9
|
+
{
|
10
|
+
role: 'body',
|
11
|
+
text: text,
|
12
|
+
format: 'html',
|
13
|
+
layout: 'bodyLayout',
|
14
|
+
textStyle: 'bodyStyle',
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# Get the exporter class for text elements
|
21
|
+
# @return [ArticleJSON::Export::Common::HTML::Elements::Base]
|
22
|
+
def text_exporter
|
23
|
+
self.class.exporter_by_type(:text)
|
24
|
+
end
|
25
|
+
|
26
|
+
def text
|
27
|
+
@element.content.map do |child_element|
|
28
|
+
text_exporter.new(child_element)
|
29
|
+
.export
|
30
|
+
end.join
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module AppleNews
|
4
|
+
module Elements
|
5
|
+
class Quote < Base
|
6
|
+
include ArticleJSON::Export::Common::HTML::Elements::Base
|
7
|
+
include ArticleJSON::Export::Common::HTML::Elements::Text
|
8
|
+
|
9
|
+
def export
|
10
|
+
[quote, author]
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
# Quote
|
16
|
+
# @return [Hash]
|
17
|
+
def quote
|
18
|
+
{
|
19
|
+
role: 'pullquote',
|
20
|
+
text: quote_text,
|
21
|
+
format: 'html',
|
22
|
+
layout: 'pullquoteLayout',
|
23
|
+
textStyle: 'pullquoteStyle',
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
# Author
|
28
|
+
# @return [Hash]
|
29
|
+
def author
|
30
|
+
{
|
31
|
+
role: 'author',
|
32
|
+
text: author_text,
|
33
|
+
format: 'html',
|
34
|
+
layout: 'pullquoteAttributeLayout',
|
35
|
+
textStyle: 'quoteAttributeStyle',
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def text_exporter
|
40
|
+
self.class.exporter_by_type(:text)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Quote Text
|
44
|
+
# @return [String]
|
45
|
+
def quote_text
|
46
|
+
element = @element.content.first&.content.first
|
47
|
+
text_exporter.new(element).export
|
48
|
+
end
|
49
|
+
|
50
|
+
# Author Text
|
51
|
+
# @return [String]
|
52
|
+
def author_text
|
53
|
+
element = @element.caption.first
|
54
|
+
text_exporter.new(element).export
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module AppleNews
|
4
|
+
module Elements
|
5
|
+
class Text < Base
|
6
|
+
include ArticleJSON::Export::Common::HTML::Elements::Base
|
7
|
+
include ArticleJSON::Export::Common::HTML::Elements::Text
|
8
|
+
|
9
|
+
UNSUPPORTED_HTML_TAGS = %w[title meta script noscript style link applet object iframe
|
10
|
+
noframes form select option optgroup
|
11
|
+
].freeze
|
12
|
+
|
13
|
+
# A Nokogiri object is returned with`super`, which is is then
|
14
|
+
# returned as a either a string or as HTML (when not plain text),
|
15
|
+
# both of which are compatible with Apple News format. Takes into
|
16
|
+
# account bold, italic and href.
|
17
|
+
# @return [String]
|
18
|
+
def export
|
19
|
+
super.to_s
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param [String] text
|
23
|
+
def create_text_nodes(text)
|
24
|
+
Nokogiri::HTML.fragment(sanitize_text(text).gsub(/\n/, '<br>')).children
|
25
|
+
end
|
26
|
+
|
27
|
+
# Removes UNSUPPORTED_TAGS from text
|
28
|
+
#
|
29
|
+
# @param [String] text
|
30
|
+
# @return [String]
|
31
|
+
def sanitize_text(text)
|
32
|
+
doc = Nokogiri::HTML.fragment(text)
|
33
|
+
UNSUPPORTED_HTML_TAGS.each do |tag|
|
34
|
+
doc.search(tag).each(&:remove)
|
35
|
+
end
|
36
|
+
doc.inner_html
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module AppleNews
|
4
|
+
module Elements
|
5
|
+
class TextBox < Base
|
6
|
+
include ArticleJSON::Export::Common::HTML::Elements::TextBox
|
7
|
+
# List
|
8
|
+
# @return [Hash]
|
9
|
+
def export
|
10
|
+
{
|
11
|
+
role: 'container',
|
12
|
+
layout: 'textBoxLayout',
|
13
|
+
style: 'textBoxStyle',
|
14
|
+
components: map_styles(elements),
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# @return [Array]
|
21
|
+
def elements
|
22
|
+
@element.content.map do |child_element|
|
23
|
+
case child_element
|
24
|
+
when ArticleJSON::Elements::Heading
|
25
|
+
namespace::Heading.new(child_element).export
|
26
|
+
when ArticleJSON::Elements::Paragraph
|
27
|
+
namespace::Paragraph.new(child_element).export
|
28
|
+
when ArticleJSON::Elements::List
|
29
|
+
namespace::List.new(child_element).export
|
30
|
+
else
|
31
|
+
namespace::Text.new(child_element).export
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# @return [Module]
|
37
|
+
def namespace
|
38
|
+
ArticleJSON::Export::AppleNews::Elements
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [Array]
|
42
|
+
def map_styles(elements)
|
43
|
+
elements.map do |child_element|
|
44
|
+
child_element.merge(layout: 'textBox' + child_element[:layout].sub(/\S/, &:upcase))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Export
|
3
|
+
module AppleNews
|
4
|
+
class Exporter
|
5
|
+
# @param [Array[ArticleJSON::Elements::Base]] elements
|
6
|
+
def initialize(elements)
|
7
|
+
@elements = elements
|
8
|
+
end
|
9
|
+
|
10
|
+
# Return the components section of an Apple News Article as JSON
|
11
|
+
#
|
12
|
+
# Images and EmbededVideos are nested in an array with the components
|
13
|
+
# array when they contain captions. As Apple News skips over these
|
14
|
+
# nested arrays, we must flatten the array.
|
15
|
+
#
|
16
|
+
# @return [String]
|
17
|
+
def to_json
|
18
|
+
{ components: components.flatten }.to_json
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
# Generate an array with the plain text representation of all elements
|
24
|
+
#
|
25
|
+
# @return [Array]
|
26
|
+
def components
|
27
|
+
@components ||=
|
28
|
+
@elements.map do |element|
|
29
|
+
ArticleJSON::Export::AppleNews::Elements::Base
|
30
|
+
.build(element)
|
31
|
+
&.export
|
32
|
+
end.reject { |hash| hash.nil? || hash.empty? }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -22,25 +22,31 @@ module ArticleJSON
|
|
22
22
|
# The value of the image's `alt` attribute
|
23
23
|
# @return [String]
|
24
24
|
def alt
|
25
|
+
return '' if image_url?
|
26
|
+
|
25
27
|
image_node.attribute('alt')&.value || ''
|
26
28
|
end
|
27
29
|
|
28
30
|
# The value of the image's `src` attribute
|
29
31
|
# @return [String]
|
30
32
|
def source_url
|
33
|
+
return @node.inner_text.strip if image_url?
|
34
|
+
|
31
35
|
image_node.attribute('src').value
|
32
36
|
end
|
33
37
|
|
34
38
|
# The node of the actual image
|
35
39
|
# @return [Nokogiri::HTML::Node]
|
36
40
|
def image_node
|
37
|
-
@
|
41
|
+
return @image_node if defined? @image_node
|
42
|
+
|
43
|
+
@image_node = @node.xpath('.//img').first
|
38
44
|
end
|
39
45
|
|
40
46
|
# Check if the image is floating (left, right or not at all)
|
41
47
|
# @return [Symbol]
|
42
48
|
def float
|
43
|
-
super if floatable_size?
|
49
|
+
super if image_url? || floatable_size?
|
44
50
|
end
|
45
51
|
|
46
52
|
# Extracts an href from the tag [image-link-to: url]) if present
|
@@ -80,6 +86,7 @@ module ArticleJSON
|
|
80
86
|
def href_regexp
|
81
87
|
%r{\[image-link-to:\s+(?<url>.*?)\]}
|
82
88
|
end
|
89
|
+
|
83
90
|
# Check if the image's width can be determined and is less than 500px
|
84
91
|
# This is about 3/4 of the google document width...
|
85
92
|
# @return [Boolean]
|
@@ -101,6 +108,13 @@ module ArticleJSON
|
|
101
108
|
match['px'].to_i if match && match['px']
|
102
109
|
end
|
103
110
|
end
|
111
|
+
|
112
|
+
# When the current node doesn't contain an actual image tag,
|
113
|
+
# we're dealing with an image URL
|
114
|
+
# @return [Boolean]
|
115
|
+
def image_url?
|
116
|
+
image_node.nil?
|
117
|
+
end
|
104
118
|
end
|
105
119
|
end
|
106
120
|
end
|
@@ -88,7 +88,17 @@ module ArticleJSON
|
|
88
88
|
# @return [Boolean]
|
89
89
|
def image?
|
90
90
|
return @is_image if defined? @is_image
|
91
|
-
@is_image = node.xpath('.//img').length > 0
|
91
|
+
@is_image = image_url? || node.xpath('.//img').length > 0
|
92
|
+
end
|
93
|
+
|
94
|
+
# Check if the node contains an image URL
|
95
|
+
# @return [Boolean]
|
96
|
+
def image_url?
|
97
|
+
return @is_image_url if defined? @is_image_url
|
98
|
+
|
99
|
+
text = node.inner_text.strip
|
100
|
+
url_regexp = %r{https?:\/\/\S+\.(?:jpg|jpeg|png|gif)}i
|
101
|
+
@is_image_url = !!(url_regexp =~ text)
|
92
102
|
end
|
93
103
|
|
94
104
|
# Check if the node contains an embedded element
|
@@ -6,7 +6,12 @@ module ArticleJSON
|
|
6
6
|
# @param [String] html
|
7
7
|
def initialize(html)
|
8
8
|
doc = Nokogiri::HTML(html)
|
9
|
-
|
9
|
+
selection = if doc.xpath('//body/div').empty?
|
10
|
+
doc.xpath('//body')
|
11
|
+
else
|
12
|
+
doc.xpath('//body/div')
|
13
|
+
end
|
14
|
+
@body_enumerator = selection.last.children.to_enum
|
10
15
|
|
11
16
|
css_node = doc.xpath('//head/style').last
|
12
17
|
@css_analyzer = CSSAnalyzer.new(css_node&.inner_text)
|
@@ -11,7 +11,8 @@ module ArticleJSON
|
|
11
11
|
# The URL for the oembed API call
|
12
12
|
# @return [String]
|
13
13
|
def oembed_url
|
14
|
-
"https://
|
14
|
+
"https://graph.facebook.com/v9.0/oembed_video?url=#{source_url}" \
|
15
|
+
"&access_token=#{access_token}"
|
15
16
|
end
|
16
17
|
|
17
18
|
# The video URL of the element
|
@@ -19,6 +20,21 @@ module ArticleJSON
|
|
19
20
|
def source_url
|
20
21
|
"https://www.facebook.com/facebook/videos/#{@element.embed_id}"
|
21
22
|
end
|
23
|
+
|
24
|
+
# The facebook access token. If not set, it raises an exception
|
25
|
+
# explaining how to configure it.
|
26
|
+
#
|
27
|
+
# @return [String]
|
28
|
+
def access_token
|
29
|
+
token = ArticleJSON.configuration.facebook_token
|
30
|
+
|
31
|
+
if token.nil?
|
32
|
+
raise 'You need to configure the facebook token to use facebook' \
|
33
|
+
'embed videos, see:' \
|
34
|
+
'https://github.com/Devex/article_json#facebook-oembed'
|
35
|
+
end
|
36
|
+
token
|
37
|
+
end
|
22
38
|
end
|
23
39
|
end
|
24
40
|
end
|
@@ -11,7 +11,7 @@ module ArticleJSON
|
|
11
11
|
# The URL for the oembed API call
|
12
12
|
# @return [String]
|
13
13
|
def oembed_url
|
14
|
-
"
|
14
|
+
"https://www.youtube.com/oembed?format=json&url=#{source_url}"
|
15
15
|
end
|
16
16
|
|
17
17
|
# The video URL of the element
|
data/lib/article_json/version.rb
CHANGED
data/lib/article_json.rb
CHANGED
@@ -77,6 +77,17 @@ require_relative 'article_json/export/html/elements/quote'
|
|
77
77
|
require_relative 'article_json/export/html/elements/embed'
|
78
78
|
require_relative 'article_json/export/html/exporter'
|
79
79
|
|
80
|
+
require_relative 'article_json/export/apple_news/elements/base'
|
81
|
+
require_relative 'article_json/export/apple_news/elements/text'
|
82
|
+
require_relative 'article_json/export/apple_news/elements/heading'
|
83
|
+
require_relative 'article_json/export/apple_news/elements/paragraph'
|
84
|
+
require_relative 'article_json/export/apple_news/elements/list'
|
85
|
+
require_relative 'article_json/export/apple_news/elements/image'
|
86
|
+
require_relative 'article_json/export/apple_news/elements/embed'
|
87
|
+
require_relative 'article_json/export/apple_news/elements/quote'
|
88
|
+
require_relative 'article_json/export/apple_news/elements/text_box'
|
89
|
+
require_relative 'article_json/export/apple_news/exporter'
|
90
|
+
|
80
91
|
require_relative 'article_json/export/amp/elements/base'
|
81
92
|
require_relative 'article_json/export/amp/elements/text'
|
82
93
|
require_relative 'article_json/export/amp/elements/paragraph'
|