diffbot 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +13 -0
- data/diffbot.gemspec +1 -1
- data/lib/diffbot/article.rb +42 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c0ecd299271e20b33ba733f9a5ee42d970ada5c
|
4
|
+
data.tar.gz: 00273251dfa8c68affcd332180a28a61c0994ec8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54b92fcea1708907aac25f67e1fbace1c756b785cbdb28aaf821362426486e2cd479d0abe66bbf5e3760a0d3776edfc53e1a934ed3beb8c8dbaf509a42e26aec
|
7
|
+
data.tar.gz: 5286e54192b8d4f71ada9dd081e4251c6b7fc3a449d25f79edfda47017e997776ec6b149bea0dd2109dcece920a170d221545d6afbffb5798d9f8ca6a4722324
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -59,6 +59,19 @@ This is a list of all the fields returned by the `Diffbot::Article.fetch` call:
|
|
59
59
|
the HTML option in the request.
|
60
60
|
* `tags`: A list of tags/keywords extracted from the article.
|
61
61
|
* `xpath`: The XPath at which this article was found in the page.
|
62
|
+
* `human_language`: Returns the (spoken/human) language of the submitted URL, using two-letter ISO 639-1 nomenclature.
|
63
|
+
* `num_pages`: Number of pages automatically concatenated to form the text or html response.
|
64
|
+
* `images`: Array of images, if present within the article body.
|
65
|
+
* `url`: Direct (fully resolved) link to image.
|
66
|
+
* `pixel_height`: Image height, in pixels.
|
67
|
+
* `pixel_width`: Image width, in pixels.
|
68
|
+
* `caption`: Diffbot-determined best caption for the image, if detected.
|
69
|
+
* `primary`: Returns 'true' if image is identified as primary.
|
70
|
+
* `videos`: Array of videos, if present within the article body.
|
71
|
+
* `url`: Direct (fully resolved) link to the video content.
|
72
|
+
* `pixel_height`: Video height, in pixels, if accessible.
|
73
|
+
* `pixel_width`: Video width, in pixels, if accessible.
|
74
|
+
* `primary`: Returns "true" if the video is identified as primary.
|
62
75
|
|
63
76
|
### Options
|
64
77
|
|
data/diffbot.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "diffbot"
|
3
|
-
s.version = "0.1.
|
3
|
+
s.version = "0.1.5"
|
4
4
|
s.description = "Diffbot provides a concise API for analyzing and extracting semantic information from web pages using Diffbot (http://www.diffbot.com)."
|
5
5
|
s.summary = "Ruby interface to the Diffbot API "
|
6
6
|
s.authors = ["Nicolas Sanguinetti", "Roman Greshny"]
|
data/lib/diffbot/article.rb
CHANGED
@@ -63,6 +63,10 @@ module Diffbot
|
|
63
63
|
# Public: Date of the article (as a string).
|
64
64
|
property :date
|
65
65
|
|
66
|
+
# Returns the (spoken/human) language of the submitted URL, using two-letter
|
67
|
+
# ISO 639-1 nomenclature.
|
68
|
+
property :human_language, from: :humanLanguage
|
69
|
+
|
66
70
|
class MediaItem < Hashie::Trash
|
67
71
|
property :type
|
68
72
|
property :link
|
@@ -80,6 +84,25 @@ module Diffbot
|
|
80
84
|
property :media
|
81
85
|
coerce_property :media, collection: MediaItem
|
82
86
|
|
87
|
+
class ImageItem < Hashie::Trash
|
88
|
+
property :url
|
89
|
+
property :pixel_height, from: :pixelHeight
|
90
|
+
property :pixel_width, from: :pixelWidth
|
91
|
+
property :caption
|
92
|
+
property :primary
|
93
|
+
end
|
94
|
+
|
95
|
+
# Public: Array of images, if present within the article body.
|
96
|
+
# object with the following attributes
|
97
|
+
#
|
98
|
+
# url - Direct (fully resolved) link to image.
|
99
|
+
# pixel_height - Image height, in pixels.
|
100
|
+
# pixel_weight - Image width, in pixels.
|
101
|
+
# caption - Diffbot-determined best caption for the image, if detected.
|
102
|
+
# primary - Returns "true" if image is identified as primary.
|
103
|
+
property :images
|
104
|
+
coerce_property :images, class: ImageItem
|
105
|
+
|
83
106
|
# Public: The raw text of the article, without formatting.
|
84
107
|
property :text
|
85
108
|
|
@@ -111,6 +134,22 @@ module Diffbot
|
|
111
134
|
|
112
135
|
property :supertags
|
113
136
|
|
137
|
+
class VideoItem < Hashie::Trash
|
138
|
+
property :url
|
139
|
+
property :pixel_height, from: :pixelHeight
|
140
|
+
property :pixel_width, from: :pixelWidth
|
141
|
+
property :primary
|
142
|
+
end
|
143
|
+
# Public: Array of videos, if present within the article body
|
144
|
+
# object with the following attributes
|
145
|
+
#
|
146
|
+
# url - Direct (fully resolved) link to the video content.
|
147
|
+
# pixel_height - Video height, in pixels, if accessible.
|
148
|
+
# pixel_width - Video width, in pixels, if accessible.
|
149
|
+
# primary - Returns "true" if the video is identified as primary.
|
150
|
+
property :videos
|
151
|
+
coerce_property :videos, class: VideoItem
|
152
|
+
|
114
153
|
class Stats < Hashie::Trash
|
115
154
|
property :fetch_time, from: :fetchTime
|
116
155
|
property :confidence
|
@@ -157,7 +196,9 @@ module Diffbot
|
|
157
196
|
# req.summary = true
|
158
197
|
# end
|
159
198
|
|
160
|
-
|
199
|
+
# Number of pages automatically concatenated to form the text or html response.
|
200
|
+
property :num_pages, from: :numPages
|
201
|
+
property :next_page, from: :nextPage
|
161
202
|
property :resolved_url
|
162
203
|
property :type
|
163
204
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: diffbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nicolas Sanguinetti
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-02-
|
12
|
+
date: 2014-02-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: excon
|