article_json 0.3.2 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +24 -1
- data/README.md +15 -0
- data/bin/article_json_export_amp.rb +14 -0
- data/bin/article_json_export_facebook.rb +15 -0
- data/bin/article_json_export_plain_text.rb +14 -0
- data/bin/update_oembed_request-stubs.sh +13 -0
- data/bin/update_reference_document.sh +12 -0
- data/lib/article_json.rb +1 -0
- data/lib/article_json/elements/image.rb +11 -3
- data/lib/article_json/elements/text_box.rb +7 -3
- data/lib/article_json/export/amp/custom_element_library_resolver.rb +4 -2
- data/lib/article_json/export/amp/elements/embed.rb +17 -2
- data/lib/article_json/export/common/html/elements/image.rb +19 -5
- data/lib/article_json/import/google_doc/html/embedded_parser.rb +1 -0
- data/lib/article_json/import/google_doc/html/embedded_soundcloud_parser.rb +28 -0
- data/lib/article_json/import/google_doc/html/image_parser.rb +34 -1
- data/lib/article_json/import/google_doc/html/node_analyzer.rb +9 -1
- data/lib/article_json/import/google_doc/html/parser.rb +5 -1
- data/lib/article_json/import/google_doc/html/text_box_parser.rb +19 -2
- data/lib/article_json/utils.rb +1 -0
- data/lib/article_json/utils/o_embed_resolver/base.rb +1 -0
- data/lib/article_json/utils/o_embed_resolver/soundcloud.rb +25 -0
- data/lib/article_json/version.rb +1 -1
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: baf39dbf5fb3f225088927c3ea9f7d58a59a502c6c70da1546829d857760ee6a
|
4
|
+
data.tar.gz: f9c60e3a2e8aadffda6525eb9acd085ee5ffcc53697cc7fec4a3c204209a571e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28783003b3b27d4eca2e5141dd59967f0f2ee44f077726bd5935a8e9dfa49fbd482cac67d49f2bc69e66797cb6dff7c67093124887215423117df6bbed8c5840
|
7
|
+
data.tar.gz: 0ee51b7975ed3621a99abe34a4d2180c327e3a1a678539aa58ed40171d8bf0cf812c0b1478c49886418fb410eb8634ac3b20c8a00ed881da2edeb5f5e7bc469f
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,27 @@
|
|
1
1
|
# Changelog
|
2
|
+
## 0.3.8 - 2020/7/31
|
3
|
+
- **Improvements:**
|
4
|
+
- Add a script to update oembed stubs fixtures.
|
5
|
+
- Support for `alt` attribute in images.
|
6
|
+
|
7
|
+
- **Fix:** Fix a bug when using the `[image-link-to: ]` tag.
|
8
|
+
|
9
|
+
## 0.3.7 - 2019/8/21
|
10
|
+
- **Fix:** Only use https for soundcloud oembed api
|
11
|
+
|
12
|
+
## 0.3.6 - 2019/8/6
|
13
|
+
- **Improvement** Add tags support in text_box element.
|
14
|
+
|
15
|
+
## 0.3.5 - 2018/12/12
|
16
|
+
- **Improvements** to import and export image links from Google Docs
|
17
|
+
- Import image `href` from caption text using a custom tag
|
18
|
+
- Export the image element href attribute as a link
|
19
|
+
|
20
|
+
## 0.3.4 - 2018/5/10
|
21
|
+
- **Fix:** Only include slug from the soundcloud URL in google doc parser
|
22
|
+
|
23
|
+
## 0.3.3 - 2018/4/12
|
24
|
+
- Support embedding SoundCloud
|
2
25
|
|
3
26
|
## 0.3.2 - 2017/12/06
|
4
27
|
- Another **fix** to prevent `nil` elements when placing additional elements on articles that end with empty paragraphs
|
@@ -43,7 +66,7 @@ One potentially **breaking change** was added:
|
|
43
66
|
|
44
67
|
## 0.1.0 - 2017/09/20
|
45
68
|
This is the very first release, with the following functionality:
|
46
|
-
- article-json format that supports several basic elements; like headings,
|
69
|
+
- article-json format that supports several basic elements; like headings,
|
47
70
|
paragraphs, images or lists
|
48
71
|
- Resolving of embedded elements like videos or tweets via OEmbed standard
|
49
72
|
- Conversion from and to JSON (or ruby hashes)
|
data/README.md
CHANGED
@@ -53,6 +53,21 @@ $ ./bin/article_json_export_google_doc.rb $DOC_ID \
|
|
53
53
|
| ./bin/article_json_export_html.rb
|
54
54
|
```
|
55
55
|
|
56
|
+
You can also update all the different exported versions of the reference
|
57
|
+
document (html, json, amp, facebook instant article and plain txt) by
|
58
|
+
running the following command:
|
59
|
+
|
60
|
+
```
|
61
|
+
$ ./bin/update_reference_document.sh
|
62
|
+
```
|
63
|
+
|
64
|
+
When running the tests, we use some fixtures to mock the responses for oembed
|
65
|
+
request, but these may change over time. To update them, run:
|
66
|
+
|
67
|
+
```
|
68
|
+
$ ./bin/update_oembed_request-stubs.sh
|
69
|
+
```
|
70
|
+
|
56
71
|
### Configuration
|
57
72
|
There are some configuration options that allow a more tailored usage of the
|
58
73
|
`article_json` gem. The following code snippet gives an example for every
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
4
|
+
#
|
5
|
+
# Simple script to read a JSON document and export it to AMP.
|
6
|
+
#
|
7
|
+
# Usage:
|
8
|
+
#
|
9
|
+
# ./bin/article_json_export_amp.rb < my_document.json
|
10
|
+
#
|
11
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
12
|
+
|
13
|
+
require_relative '../lib/article_json'
|
14
|
+
puts ArticleJSON::Article.from_json(ARGF.read).to_amp
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
4
|
+
#
|
5
|
+
# Simple script to read a JSON document and export it to Facebook Instant
|
6
|
+
# Article.
|
7
|
+
#
|
8
|
+
# Usage:
|
9
|
+
#
|
10
|
+
# ./bin/article_json_export_facebook.rb < my_document.json
|
11
|
+
#
|
12
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
13
|
+
|
14
|
+
require_relative '../lib/article_json'
|
15
|
+
puts ArticleJSON::Article.from_json(ARGF.read).to_facebook_instant_article
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
4
|
+
#
|
5
|
+
# Simple script to read a JSON document and export it to plain text.
|
6
|
+
#
|
7
|
+
# Usage:
|
8
|
+
#
|
9
|
+
# ./bin/article_json_export_plain_text.rb < my_document.json
|
10
|
+
#
|
11
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
12
|
+
|
13
|
+
require_relative '../lib/article_json'
|
14
|
+
puts ArticleJSON::Article.from_json(ARGF.read).to_plain_text
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env sh
|
2
|
+
|
3
|
+
curl -X GET "https://vimeo.com/api/oembed.json?url=https://vimeo.com/42315417" | jq > spec/fixtures/vimeo_video_oembed.json
|
4
|
+
|
5
|
+
curl -X GET "http://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=_ZG8HBuDjgc" | jq > spec/fixtures/youtube_video_oembed.json
|
6
|
+
|
7
|
+
curl -X GET "https://www.slideshare.net/api/oembed/2?format=json&url=https://www.slideshare.net/Devex/the-best-global-development-quotes-of-2012" | jq > spec/fixtures/slideshare_oembed.json
|
8
|
+
|
9
|
+
curl -X GET "https://api.twitter.com/1/statuses/oembed.json?align=center&url=https://twitter.com/d3v3x/status/554608639030599681" | jq > spec/fixtures/tweet_oembed.json
|
10
|
+
|
11
|
+
curl -X GET 'https://soundcloud.com/oembed?format=json&url=https://soundcloud.com/rich-the-kid/plug-walk-1' | jq > spec/fixtures/soundcloud_oembed.json
|
12
|
+
|
13
|
+
curl -X GET 'https://www.facebook.com/plugins/video/oembed.json?url=https://www.facebook.com/facebook/videos/1814600831891266' | jq > spec/fixtures/facebook_video_oembed.json
|
@@ -7,6 +7,9 @@ DOC_ID="1E4lncZE2jDkbE34eDyYQmXKA9O26BHUiwguz4S9qyE8"
|
|
7
7
|
SOURCE_HTML_FILE="spec/fixtures/reference_document.html"
|
8
8
|
JSON_FILE="spec/fixtures/reference_document_parsed.json"
|
9
9
|
HTML_EXPORT_FILE="spec/fixtures/reference_document_exported.html"
|
10
|
+
AMP_EXPORT_FILE="spec/fixtures/reference_document_exported.amp.html"
|
11
|
+
FACEBOOK_EXPORT_FILE="spec/fixtures/reference_document_exported.facebook.html"
|
12
|
+
PLAIN_TEXT_EXPORT_FILE="spec/fixtures/reference_document_exported.txt"
|
10
13
|
|
11
14
|
# export the google doc to HTML
|
12
15
|
./bin/article_json_export_google_doc.rb ${DOC_ID} > ${SOURCE_HTML_FILE}
|
@@ -16,3 +19,12 @@ HTML_EXPORT_FILE="spec/fixtures/reference_document_exported.html"
|
|
16
19
|
|
17
20
|
# convert the JSON export to HTML
|
18
21
|
./bin/article_json_export_html.rb < ${JSON_FILE} > ${HTML_EXPORT_FILE}
|
22
|
+
|
23
|
+
# convert the JSON export to AMP
|
24
|
+
./bin/article_json_export_amp.rb < ${JSON_FILE} > ${AMP_EXPORT_FILE}
|
25
|
+
|
26
|
+
# convert the JSON export to Facebook Instant Article
|
27
|
+
./bin/article_json_export_facebook.rb < ${JSON_FILE} > ${FACEBOOK_EXPORT_FILE}
|
28
|
+
|
29
|
+
# convert the JSON export to plain text
|
30
|
+
./bin/article_json_export_plain_text.rb < ${JSON_FILE} > ${PLAIN_TEXT_EXPORT_FILE}
|
data/lib/article_json.rb
CHANGED
@@ -37,6 +37,7 @@ require_relative 'article_json/import/google_doc/html/embedded_vimeo_video_parse
|
|
37
37
|
require_relative 'article_json/import/google_doc/html/embedded_youtube_video_parser'
|
38
38
|
require_relative 'article_json/import/google_doc/html/embedded_slideshare_parser'
|
39
39
|
require_relative 'article_json/import/google_doc/html/embedded_tweet_parser'
|
40
|
+
require_relative 'article_json/import/google_doc/html/embedded_soundcloud_parser'
|
40
41
|
require_relative 'article_json/import/google_doc/html/parser'
|
41
42
|
|
42
43
|
require_relative 'article_json/export/common/elements/base'
|
@@ -1,16 +1,20 @@
|
|
1
1
|
module ArticleJSON
|
2
2
|
module Elements
|
3
3
|
class Image < Base
|
4
|
-
attr_reader :source_url, :caption, :float
|
4
|
+
attr_reader :source_url, :caption, :float, :href, :alt
|
5
5
|
|
6
6
|
# @param [String] source_url
|
7
7
|
# @param [Array[ArticleJSON::Elements::Text]] caption
|
8
8
|
# @param [Symbol] float
|
9
|
-
|
9
|
+
# @param [String] href
|
10
|
+
# @param [String] alt
|
11
|
+
def initialize(source_url:, caption:, float: nil, href: nil, alt: nil)
|
10
12
|
@type = :image
|
11
13
|
@source_url = source_url
|
12
14
|
@caption = caption
|
13
15
|
@float = float
|
16
|
+
@href = href
|
17
|
+
@alt = alt
|
14
18
|
end
|
15
19
|
|
16
20
|
# Hash representation of this image element
|
@@ -21,6 +25,8 @@ module ArticleJSON
|
|
21
25
|
source_url: source_url,
|
22
26
|
float: float,
|
23
27
|
caption: caption.map(&:to_h),
|
28
|
+
href: href,
|
29
|
+
alt: alt,
|
24
30
|
}
|
25
31
|
end
|
26
32
|
|
@@ -31,7 +37,9 @@ module ArticleJSON
|
|
31
37
|
new(
|
32
38
|
source_url: hash[:source_url],
|
33
39
|
caption: parse_hash_list(hash[:caption]),
|
34
|
-
float: hash[:float]&.to_sym
|
40
|
+
float: hash[:float]&.to_sym,
|
41
|
+
href: hash[:href],
|
42
|
+
alt: hash[:alt]
|
35
43
|
)
|
36
44
|
end
|
37
45
|
end
|
@@ -1,14 +1,16 @@
|
|
1
1
|
module ArticleJSON
|
2
2
|
module Elements
|
3
3
|
class TextBox < Base
|
4
|
-
attr_reader :content, :float
|
4
|
+
attr_reader :content, :float, :tags
|
5
5
|
|
6
6
|
# @param [Array[Paragraph|Heading|List]] content
|
7
7
|
# @param [Symbol] float
|
8
|
-
|
8
|
+
# @param [Array] tags
|
9
|
+
def initialize(content:, float: nil, tags: [])
|
9
10
|
@type = :text_box
|
10
11
|
@content = content
|
11
12
|
@float = float
|
13
|
+
@tags = tags
|
12
14
|
end
|
13
15
|
|
14
16
|
# Hash representation of this text box element
|
@@ -18,6 +20,7 @@ module ArticleJSON
|
|
18
20
|
type: type,
|
19
21
|
float: float,
|
20
22
|
content: content.map(&:to_h),
|
23
|
+
tags: tags,
|
21
24
|
}
|
22
25
|
end
|
23
26
|
|
@@ -27,7 +30,8 @@ module ArticleJSON
|
|
27
30
|
def parse_hash(hash)
|
28
31
|
new(
|
29
32
|
content: parse_hash_list(hash[:content]),
|
30
|
-
float: hash[:float]&.to_sym
|
33
|
+
float: hash[:float]&.to_sym,
|
34
|
+
tags: hash[:tags]
|
31
35
|
)
|
32
36
|
end
|
33
37
|
end
|
@@ -26,8 +26,8 @@ module ArticleJSON
|
|
26
26
|
def script_tags
|
27
27
|
sources.map do |custom_element_tag, src|
|
28
28
|
<<-HTML.gsub(/\s+/, ' ').strip
|
29
|
-
<script async
|
30
|
-
custom-element="#{custom_element_tag}"
|
29
|
+
<script async
|
30
|
+
custom-element="#{custom_element_tag}"
|
31
31
|
src="#{src}"></script>
|
32
32
|
HTML
|
33
33
|
end
|
@@ -46,6 +46,8 @@ module ArticleJSON
|
|
46
46
|
'amp-vimeo': 'https://cdn.ampproject.org/v0/amp-vimeo-0.1.js',
|
47
47
|
'amp-facebook':
|
48
48
|
'https://cdn.ampproject.org/v0/amp-facebook-0.1.js',
|
49
|
+
'amp-soundcloud':
|
50
|
+
'https://cdn.ampproject.org/v0/amp-soundcloud-0.1.js',
|
49
51
|
}[custom_element_tag]
|
50
52
|
end
|
51
53
|
end
|
@@ -14,6 +14,7 @@ module ArticleJSON
|
|
14
14
|
when :facebook_video then %i(amp-facebook)
|
15
15
|
when :tweet then %i(amp-twitter)
|
16
16
|
when :slideshare then %i(amp-iframe)
|
17
|
+
when :soundcloud then %i(amp-soundcloud)
|
17
18
|
else []
|
18
19
|
end
|
19
20
|
end
|
@@ -33,7 +34,9 @@ module ArticleJSON
|
|
33
34
|
when :tweet
|
34
35
|
tweet_node
|
35
36
|
when :slideshare
|
36
|
-
|
37
|
+
iframe_node
|
38
|
+
when :soundcloud
|
39
|
+
soundcloud_node
|
37
40
|
end
|
38
41
|
end
|
39
42
|
|
@@ -74,8 +77,20 @@ module ArticleJSON
|
|
74
77
|
height: default_height)
|
75
78
|
end
|
76
79
|
|
80
|
+
def soundcloud_node
|
81
|
+
src = Nokogiri::HTML(@element.oembed_data[:html])
|
82
|
+
.xpath('//iframe/@src').first.value
|
83
|
+
track_id = src.match(/tracks%2F(\d+)/)[1]
|
84
|
+
create_element('amp-soundcloud',
|
85
|
+
layout: 'fixed-height',
|
86
|
+
'data-trackid': track_id,
|
87
|
+
'data-visual': true,
|
88
|
+
width: 'auto',
|
89
|
+
height: default_height)
|
90
|
+
end
|
91
|
+
|
77
92
|
# @return [Nokogiri::XML::Element]
|
78
|
-
def
|
93
|
+
def iframe_node
|
79
94
|
node = Nokogiri::HTML(@element.oembed_data[:html]).xpath('//iframe')
|
80
95
|
create_element('amp-iframe',
|
81
96
|
src: node.attribute('src').value,
|
@@ -7,22 +7,36 @@ module ArticleJSON
|
|
7
7
|
include ArticleJSON::Export::Common::HTML::Elements::Shared::Caption
|
8
8
|
include ArticleJSON::Export::Common::HTML::Elements::Shared::Float
|
9
9
|
|
10
|
-
# Generate the `<figure>` node containing the image and caption
|
10
|
+
# Generate the `<figure>` node containing the image and caption or
|
11
|
+
# an `<a>` node containing the `<figure>` node if href is present.
|
11
12
|
# @return [Nokogiri::XML::Element]
|
12
13
|
def export
|
14
|
+
figure_node
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# @return [Nokogiri::XML::NodeSet]
|
20
|
+
def figure_node
|
13
21
|
create_element(:figure, node_opts) do |figure|
|
14
|
-
|
22
|
+
node = @element&.href ? href_node : image_node
|
23
|
+
figure.add_child(node)
|
15
24
|
if @element.caption&.any?
|
16
25
|
figure.add_child(caption_node(:figcaption))
|
17
26
|
end
|
18
27
|
end
|
19
28
|
end
|
20
29
|
|
21
|
-
private
|
22
|
-
|
23
30
|
# @return [Nokogiri::XML::NodeSet]
|
24
31
|
def image_node
|
25
|
-
create_element(:img, src: @element.source_url)
|
32
|
+
create_element(:img, src: @element.source_url, alt: @element.alt)
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Nokogiri::XML::NodeSet]
|
36
|
+
def href_node
|
37
|
+
create_element(:a, href: @element.href) do |a|
|
38
|
+
a.add_child(image_node)
|
39
|
+
end
|
26
40
|
end
|
27
41
|
|
28
42
|
# @return [Hash]
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Import
|
3
|
+
module GoogleDoc
|
4
|
+
module HTML
|
5
|
+
class EmbeddedSoundcloudParser < EmbeddedParser
|
6
|
+
# The type of this embedded element
|
7
|
+
# @return [Symbol]
|
8
|
+
def embed_type
|
9
|
+
:soundcloud
|
10
|
+
end
|
11
|
+
|
12
|
+
class << self
|
13
|
+
# Regular expression to check if a given string is a Soundcloud URL
|
14
|
+
# Also used to extract the ID from the URL.
|
15
|
+
# @return [Regexp]
|
16
|
+
def url_regexp
|
17
|
+
%r{
|
18
|
+
^\S* # all protocols & sub domains
|
19
|
+
soundcloud\.com/ # domain
|
20
|
+
(?<id>[-/0-9a-z]+) # the slug of the user / track
|
21
|
+
}xi
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -12,12 +12,19 @@ module ArticleJSON
|
|
12
12
|
def initialize(node:, caption_node:, css_analyzer:)
|
13
13
|
@node = node
|
14
14
|
@caption_node = caption_node
|
15
|
+
@href = href
|
15
16
|
@css_analyzer = css_analyzer
|
16
17
|
|
17
18
|
# Main node indicates the floating behavior
|
18
19
|
@float_node = @node
|
19
20
|
end
|
20
21
|
|
22
|
+
# The value of the image's `alt` attribute
|
23
|
+
# @return [String]
|
24
|
+
def alt
|
25
|
+
image_node.attribute('alt')&.value || ''
|
26
|
+
end
|
27
|
+
|
21
28
|
# The value of the image's `src` attribute
|
22
29
|
# @return [String]
|
23
30
|
def source_url
|
@@ -36,17 +43,43 @@ module ArticleJSON
|
|
36
43
|
super if floatable_size?
|
37
44
|
end
|
38
45
|
|
46
|
+
# Extracts an href from the tag [image-link-to: url]) if present
|
47
|
+
# in the caption node.
|
48
|
+
# @return [String]
|
49
|
+
def href
|
50
|
+
return if @caption_node.nil?
|
51
|
+
match = @caption_node.content.strip.match(href_regexp)
|
52
|
+
return if match.nil?
|
53
|
+
remove_image_link_tag
|
54
|
+
match[:url]
|
55
|
+
end
|
56
|
+
|
39
57
|
# @return [ArticleJSON::Elements::Image]
|
40
58
|
def element
|
41
59
|
ArticleJSON::Elements::Image.new(
|
42
60
|
source_url: source_url,
|
43
61
|
float: float,
|
44
|
-
caption: caption
|
62
|
+
caption: caption,
|
63
|
+
href: @href,
|
64
|
+
alt: alt
|
45
65
|
)
|
46
66
|
end
|
47
67
|
|
48
68
|
private
|
49
69
|
|
70
|
+
# Removes the [image-link-to: url] tag from the caption node
|
71
|
+
def remove_image_link_tag
|
72
|
+
@caption_node
|
73
|
+
.children
|
74
|
+
.first
|
75
|
+
.content = @caption_node.content.sub(href_regexp, '').strip
|
76
|
+
end
|
77
|
+
|
78
|
+
# Regular expression to check if there's a [image-link-to: url] tag
|
79
|
+
# @return [Regexp]
|
80
|
+
def href_regexp
|
81
|
+
%r{\[image-link-to:\s+(?<url>.*?)\]}
|
82
|
+
end
|
50
83
|
# Check if the image's width can be determined and is less than 500px
|
51
84
|
# This is about 3/4 of the google document width...
|
52
85
|
# @return [Boolean]
|
@@ -17,6 +17,14 @@ module ArticleJSON
|
|
17
17
|
node.inner_text.strip.downcase == text.strip.downcase
|
18
18
|
end
|
19
19
|
|
20
|
+
# Check if the node text begins with a certain text
|
21
|
+
# @param [String]
|
22
|
+
# @return [Boolean]
|
23
|
+
def begins_with?(text)
|
24
|
+
first_word = node.inner_text.strip.downcase.split(' ').first
|
25
|
+
first_word == text.strip.downcase
|
26
|
+
end
|
27
|
+
|
20
28
|
# Check if the node is empty, i.e. not containing any text
|
21
29
|
# Given that images are the only nodes without text, we have to make
|
22
30
|
# sure that it's not an image.
|
@@ -65,7 +73,7 @@ module ArticleJSON
|
|
65
73
|
# @return [Boolean]
|
66
74
|
def text_box?
|
67
75
|
return @is_text_box if defined? @is_text_box
|
68
|
-
@is_text_box =
|
76
|
+
@is_text_box = begins_with?('textbox:') || begins_with?('highlight:')
|
69
77
|
end
|
70
78
|
|
71
79
|
# Check if the node starts a quote
|
@@ -83,7 +83,11 @@ module ArticleJSON
|
|
83
83
|
# @return [ArticleJSON::Elements::TextBox]
|
84
84
|
def parse_text_box
|
85
85
|
TextBoxParser
|
86
|
-
.new(
|
86
|
+
.new(
|
87
|
+
type_node: @current_node.node,
|
88
|
+
nodes: nodes_until_hr,
|
89
|
+
css_analyzer: @css_analyzer
|
90
|
+
)
|
87
91
|
.element
|
88
92
|
end
|
89
93
|
|
@@ -5,14 +5,18 @@ module ArticleJSON
|
|
5
5
|
class TextBoxParser
|
6
6
|
include Shared::Float
|
7
7
|
|
8
|
+
# @param [Nokogiri::HTML::Node] type_node Document node that states
|
9
|
+
# that this is a textbox.
|
10
|
+
# May contain tags, too.
|
8
11
|
# @param [Array[Nokogiri::HTML::Node]] nodes
|
9
12
|
# @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
|
10
|
-
def initialize(nodes:, css_analyzer:)
|
13
|
+
def initialize(type_node: ,nodes:, css_analyzer:)
|
11
14
|
@nodes = nodes.reject { |node| NodeAnalyzer.new(node).empty? }
|
12
15
|
@css_analyzer = css_analyzer
|
13
16
|
|
14
17
|
# First node of the text box indicates floating behavior
|
15
18
|
@float_node = @nodes.first
|
19
|
+
@type_node = type_node
|
16
20
|
end
|
17
21
|
|
18
22
|
# Parse the text box's nodes to get a list of sub elements
|
@@ -22,10 +26,23 @@ module ArticleJSON
|
|
22
26
|
@nodes.map { |node| parse_sub_node(node) }.compact
|
23
27
|
end
|
24
28
|
|
29
|
+
# Extract any potential tags, specified in brackets after the Textbox definition
|
30
|
+
# @return [Array[Symbol]]
|
31
|
+
def tags
|
32
|
+
match = /(.*?)[\s\u00A0]+\[(?<tags>.*)\]/
|
33
|
+
.match(@type_node.inner_text)
|
34
|
+
return [] unless match
|
35
|
+
match[:tags].split(' ')
|
36
|
+
end
|
37
|
+
|
25
38
|
# Hash representation of this text box
|
26
39
|
# @return [ArticleJSON::Elements::TextBox]
|
27
40
|
def element
|
28
|
-
ArticleJSON::Elements::TextBox.new(
|
41
|
+
ArticleJSON::Elements::TextBox.new(
|
42
|
+
float: float,
|
43
|
+
content: content,
|
44
|
+
tags: tags
|
45
|
+
)
|
29
46
|
end
|
30
47
|
|
31
48
|
private
|
data/lib/article_json/utils.rb
CHANGED
@@ -9,4 +9,5 @@ require_relative 'utils/o_embed_resolver/slideshare'
|
|
9
9
|
require_relative 'utils/o_embed_resolver/tweet'
|
10
10
|
require_relative 'utils/o_embed_resolver/vimeo_video'
|
11
11
|
require_relative 'utils/o_embed_resolver/youtube_video'
|
12
|
+
require_relative 'utils/o_embed_resolver/soundcloud'
|
12
13
|
require_relative 'utils/additional_element_placer'
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module ArticleJSON
|
2
|
+
module Utils
|
3
|
+
module OEmbedResolver
|
4
|
+
class Soundcloud < Base
|
5
|
+
# Human readable name of the resolver
|
6
|
+
# @return [String]
|
7
|
+
def name
|
8
|
+
'Soundcloud'
|
9
|
+
end
|
10
|
+
|
11
|
+
# The URL for the oembed API call
|
12
|
+
# @return [String]
|
13
|
+
def oembed_url
|
14
|
+
"https://soundcloud.com/oembed?url=#{source_url}&format=json"
|
15
|
+
end
|
16
|
+
|
17
|
+
# The URL of the element
|
18
|
+
# @return [String]
|
19
|
+
def source_url
|
20
|
+
"https://soundcloud.com/#{@element.embed_id}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/article_json/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: article_json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Sager
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2020-07-31 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -111,9 +111,13 @@ files:
|
|
111
111
|
- CODE_OF_CONDUCT.md
|
112
112
|
- LICENSE
|
113
113
|
- README.md
|
114
|
+
- bin/article_json_export_amp.rb
|
115
|
+
- bin/article_json_export_facebook.rb
|
114
116
|
- bin/article_json_export_google_doc.rb
|
115
117
|
- bin/article_json_export_html.rb
|
118
|
+
- bin/article_json_export_plain_text.rb
|
116
119
|
- bin/article_json_parse_google_doc.rb
|
120
|
+
- bin/update_oembed_request-stubs.sh
|
117
121
|
- bin/update_reference_document.sh
|
118
122
|
- lib/article_json.rb
|
119
123
|
- lib/article_json/article.rb
|
@@ -185,6 +189,7 @@ files:
|
|
185
189
|
- lib/article_json/import/google_doc/html/embedded_facebook_video_parser.rb
|
186
190
|
- lib/article_json/import/google_doc/html/embedded_parser.rb
|
187
191
|
- lib/article_json/import/google_doc/html/embedded_slideshare_parser.rb
|
192
|
+
- lib/article_json/import/google_doc/html/embedded_soundcloud_parser.rb
|
188
193
|
- lib/article_json/import/google_doc/html/embedded_tweet_parser.rb
|
189
194
|
- lib/article_json/import/google_doc/html/embedded_vimeo_video_parser.rb
|
190
195
|
- lib/article_json/import/google_doc/html/embedded_youtube_video_parser.rb
|
@@ -204,6 +209,7 @@ files:
|
|
204
209
|
- lib/article_json/utils/o_embed_resolver/base.rb
|
205
210
|
- lib/article_json/utils/o_embed_resolver/facebook_video.rb
|
206
211
|
- lib/article_json/utils/o_embed_resolver/slideshare.rb
|
212
|
+
- lib/article_json/utils/o_embed_resolver/soundcloud.rb
|
207
213
|
- lib/article_json/utils/o_embed_resolver/tweet.rb
|
208
214
|
- lib/article_json/utils/o_embed_resolver/vimeo_video.rb
|
209
215
|
- lib/article_json/utils/o_embed_resolver/youtube_video.rb
|
@@ -227,8 +233,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
227
233
|
- !ruby/object:Gem::Version
|
228
234
|
version: '0'
|
229
235
|
requirements: []
|
230
|
-
|
231
|
-
rubygems_version: 2.6.12
|
236
|
+
rubygems_version: 3.0.8
|
232
237
|
signing_key:
|
233
238
|
specification_version: 4
|
234
239
|
summary: JSON Format for News Articles & Ruby Gem
|