whatis 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +149 -0
- data/exe/whatis +46 -0
- data/lib/whatis.rb +209 -0
- data/lib/whatis/cli.rb +38 -0
- data/lib/whatis/formatter.rb +51 -0
- data/lib/whatis/refinements.rb +34 -0
- data/lib/whatis/thisis.rb +181 -0
- data/lib/whatis/thisis/ambigous.rb +128 -0
- data/lib/whatis/thisis/link.rb +75 -0
- data/lib/whatis/thisis/notfound.rb +70 -0
- data/whatis.gemspec +55 -0
- metadata +283 -0
@@ -0,0 +1,51 @@
|
|
1
|
+
class WhatIs
|
2
|
+
using Refinements
|
3
|
+
|
4
|
+
# @private
|
5
|
+
class Formatter
|
6
|
+
def call(title, object)
|
7
|
+
str =
|
8
|
+
case object
|
9
|
+
when ThisIs
|
10
|
+
format_thisis(object)
|
11
|
+
when ThisIs::Ambigous
|
12
|
+
format_ambigous(object)
|
13
|
+
when ThisIs::NotFound
|
14
|
+
format_notfound(object)
|
15
|
+
end
|
16
|
+
"#{title}: #{str}"
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def format_thisis(object)
|
22
|
+
[
|
23
|
+
object.title,
|
24
|
+
object.coordinates&.to_s&.surround(' {', '}'),
|
25
|
+
' - ',
|
26
|
+
short_description(object)
|
27
|
+
].join
|
28
|
+
end
|
29
|
+
|
30
|
+
def short_description(obj) # rubocop:disable Metrics/AbcSize
|
31
|
+
case
|
32
|
+
when obj.categories.any?
|
33
|
+
obj.categories.sort.join('; ')
|
34
|
+
when obj.languages.count == 1
|
35
|
+
obj.languages.values.first
|
36
|
+
when !obj.description.to_s.empty?
|
37
|
+
obj.description
|
38
|
+
else
|
39
|
+
obj.extract
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def format_ambigous(object)
|
44
|
+
"#{object.title}, #{object.variants.count} options - #{object.variants.join('; ')}"
|
45
|
+
end
|
46
|
+
|
47
|
+
def format_notfound(*)
|
48
|
+
'not found'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
class WhatIs
|
2
|
+
# @private
|
3
|
+
module Refinements
|
4
|
+
refine String do
|
5
|
+
def append(after)
|
6
|
+
"#{self}#{after}"
|
7
|
+
end
|
8
|
+
|
9
|
+
def prepend(before)
|
10
|
+
"#{before}#{self}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def surround(before, after = before)
|
14
|
+
"#{before}#{self}#{after}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
refine Object do
|
19
|
+
def yield_self
|
20
|
+
yield self
|
21
|
+
end
|
22
|
+
|
23
|
+
def iff
|
24
|
+
yield(self) ? self : nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
refine Hash do
|
29
|
+
def transform_keys
|
30
|
+
map { |key, val| [yield(key), val] }.to_h
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
class WhatIs
|
2
|
+
using Refinements
|
3
|
+
|
4
|
+
# Represents one resolved entity, provides introspection and access to individual properties.
|
5
|
+
# You should never create instances of this class directly, but rather obtain it from {WhatIs#this}
|
6
|
+
# and {WhatIs#these}.
|
7
|
+
#
|
8
|
+
# @example
|
9
|
+
# paris = WhatIs.this('Paris')
|
10
|
+
# # => #<ThisIs Paris [img] {48.856700,2.350800}>
|
11
|
+
# paris.describe
|
12
|
+
# # => Paris
|
13
|
+
# # title: "Paris"
|
14
|
+
# # description: "capital city of France"
|
15
|
+
# # coordinates: #<Geo::Coord 48.856700,2.350800>
|
16
|
+
# # extract: "Paris (French pronunciation: [paʁi] ( listen)) is the capital and most populous city of France, with an administrative-limits area of 105 square kilometres (41 square miles) and a 2015 population of 2,229,621."
|
17
|
+
# # image: "https://upload.wikimedia.org/wikipedia/commons/0/08/Seine_and_Eiffel_Tower_from_Tour_Saint_Jacques_2013-08.JPG"
|
18
|
+
# #
|
19
|
+
# paris.coordinates
|
20
|
+
# # => #<Geo::Coord 48.856700,2.350800>
|
21
|
+
# paris2 = paris.what(languages: :ru, categories: true) # fetch more details
|
22
|
+
# # => #<ThisIs Paris/Париж, 12 categories [img] {48.856700,2.350800}>
|
23
|
+
# paris2.describe
|
24
|
+
# # => Paris
|
25
|
+
# # title: "Paris"
|
26
|
+
# # description: "capital city of France"
|
27
|
+
# # coordinates: #<Geo::Coord 48.856700,2.350800>
|
28
|
+
# # categories: ["3rd-century BC establishments", "Capitals in Europe", "Catholic pilgrimage sites", "Cities in France", "Cities in Île-de-France", "Companions of the Liberation", "Departments of Île-de-France", "European culture", "French culture", "Paris", "Populated places established in the 3rd century BC", "Prefectures in France"]
|
29
|
+
# # languages: {"ru"=>#<ThisIs::Link ru:Париж>}
|
30
|
+
# # extract: "Paris (French pronunciation: [paʁi] ( listen)) is the capital and most populous city of France, with an administrative-limits area of 105 square kilometres (41 square miles) and a 2015 population of 2,229,621."
|
31
|
+
# # image: "https://upload.wikimedia.org/wikipedia/commons/0/08/Seine_and_Eiffel_Tower_from_Tour_Saint_Jacques_2013-08.JPG"
|
32
|
+
# paris2.languages['ru'].resolve(categories: true)
|
33
|
+
# # => #<ThisIs Париж, 10 categories [img] {48.833333,2.333333}>
|
34
|
+
#
|
35
|
+
# See also:
|
36
|
+
#
|
37
|
+
# * {ThisIs::Ambigous} Representing disambiguation page, allows fetching variants.
|
38
|
+
# * {ThisIs::NotFound} Representing not found entity, allows searching for possible options.
|
39
|
+
class ThisIs
|
40
|
+
# @private
|
41
|
+
EXTRACTORS = {
|
42
|
+
title: ->(page) { page.title },
|
43
|
+
description: ->(page) { page.source.dig('terms', 'description', 0) },
|
44
|
+
coordinates: ->(page) {
|
45
|
+
coord = page.source['coordinates']&.first or return nil
|
46
|
+
Geo::Coord.from_h(coord)
|
47
|
+
},
|
48
|
+
categories: ->(page) {
|
49
|
+
Array(page.source['categories'])
|
50
|
+
.reject { |c| c.key?('hidden') }
|
51
|
+
.map { |c| c['title'].split(':', 2).last }
|
52
|
+
},
|
53
|
+
languages: ->(page) {
|
54
|
+
Array(page.source['langlinks'])
|
55
|
+
.map { |l| [l['lang'], l['*']] }
|
56
|
+
.map { |code, title| [code, Link.new(title, language: code)] }.to_h
|
57
|
+
.to_h
|
58
|
+
},
|
59
|
+
extract: ->(page) {
|
60
|
+
# remove HTML tags
|
61
|
+
# NB: Wikipedia "extracts" submodule has "plaintext=true" option, but it produces wrong 1-sentece
|
62
|
+
# extracts (broken by first ".", which can be somewhere in transcription of the main entity).
|
63
|
+
# HTML extracts, on the other hand, return proper sentences
|
64
|
+
#
|
65
|
+
# Link: https://en.wikipedia.org/w/api.php?action=help&modules=query%2Bextracts
|
66
|
+
page.source['extract']&.gsub(/<[^>]+>/, '')&.strip
|
67
|
+
},
|
68
|
+
image: ->(page) { page.source.dig('original', 'source') }
|
69
|
+
}.freeze
|
70
|
+
|
71
|
+
# @private
|
72
|
+
def self.create(owner, title, page)
|
73
|
+
case
|
74
|
+
when page.nil?
|
75
|
+
NotFound.new(owner, title)
|
76
|
+
when Array(page.source['categories']).any? { |c| owner.ambigous_categories.include?(c['title']) }
|
77
|
+
Ambigous.new(owner, page)
|
78
|
+
else
|
79
|
+
new(owner, page)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Original [Infoboxer page](http://www.rubydoc.info/gems/infoboxer/Infoboxer/MediaWiki/Page) data.
|
84
|
+
# @return [Infoboxer::MediaWiki::Page]
|
85
|
+
attr_reader :page
|
86
|
+
|
87
|
+
# @private
|
88
|
+
def initialize(owner, page)
|
89
|
+
@owner = owner
|
90
|
+
@page = page
|
91
|
+
@data = EXTRACTORS.map { |sym, proc| [sym, proc.call(page)] }.to_h
|
92
|
+
end
|
93
|
+
|
94
|
+
# @!method title
|
95
|
+
# Title of Wikipedia page
|
96
|
+
# @return [String]
|
97
|
+
# @!method description
|
98
|
+
# Short entity description phrase from Wikidata. Not always present.
|
99
|
+
# @return [String]
|
100
|
+
# @!method extract
|
101
|
+
# First sentence of Wikipedia page
|
102
|
+
# @return [String]
|
103
|
+
# @!method coordinates
|
104
|
+
# Geographical coordinates, associated with the page, if known, wrapped in
|
105
|
+
# [Geo::Coord](https://github.com/zverok/geo_coord) type.
|
106
|
+
# @return [Geo::Coord]
|
107
|
+
# @!method image
|
108
|
+
# URL of page's main image, if known.
|
109
|
+
# @return [Geo::Coord]
|
110
|
+
# @!method categories
|
111
|
+
# List of page's categories, present only if page was fetched with `categories: true` option.
|
112
|
+
# @return [Array<String>]
|
113
|
+
# @!method languages
|
114
|
+
# Hash of other language version of page. Present only if the page wath fetched with `:languages`
|
115
|
+
# option. Keys are language codes, values are {ThisIs::Link} objects, allowing to fetch corresponding
|
116
|
+
# entities with {ThisIs::Link#resolve}.
|
117
|
+
# @return [Hash{String => ThisIs::Link}]
|
118
|
+
|
119
|
+
EXTRACTORS.each_key { |title| define_method(title) { @data[title] } }
|
120
|
+
|
121
|
+
alias to_s title
|
122
|
+
|
123
|
+
# @return [String]
|
124
|
+
def inspect # rubocop:disable Metrics/AbcSize
|
125
|
+
[
|
126
|
+
'ThisIs ',
|
127
|
+
title,
|
128
|
+
languages.iff { |l| l.count == 1 }&.yield_self { |l| l.values.first.title.prepend('/') },
|
129
|
+
languages.iff { |l| l.count > 1 }&.yield_self { |l| " +#{l.count} translations" },
|
130
|
+
categories.iff(&:any?)&.yield_self { |c| ", #{c.count} categories" },
|
131
|
+
image&.yield_self { ' [img]' },
|
132
|
+
coordinates&.to_s&.surround(' {', '}')
|
133
|
+
].compact.join.surround('#<', '>')
|
134
|
+
end
|
135
|
+
|
136
|
+
# @return [Description]
|
137
|
+
def describe(*)
|
138
|
+
maxlength = @data.keys.map(&:length).max
|
139
|
+
Description.new(
|
140
|
+
"#{self}\n" +
|
141
|
+
clean_data
|
142
|
+
.map { |k, v| " #{k.to_s.rjust(maxlength)}: #{v.inspect}" }.join("\n")
|
143
|
+
)
|
144
|
+
end
|
145
|
+
|
146
|
+
# @return [Hash]
|
147
|
+
def to_h
|
148
|
+
{type: 'ThisIs'} # To be at the beginning of a hash
|
149
|
+
.merge(@data)
|
150
|
+
.merge(
|
151
|
+
coordinates: coordinates&.to_s,
|
152
|
+
languages: languages.transform_values(&:to_s)
|
153
|
+
).reject { |_, v| v.nil? || v.respond_to?(:empty?) && v.empty? }
|
154
|
+
end
|
155
|
+
|
156
|
+
# @return [String]
|
157
|
+
def to_json(opts)
|
158
|
+
to_h.to_json(opts)
|
159
|
+
end
|
160
|
+
|
161
|
+
# Refetch page with more data, see {WhatIs#this} for options explanation. Returns new object.
|
162
|
+
#
|
163
|
+
# @param options [Hash]
|
164
|
+
# @option options [true, String, Symbol] :languages
|
165
|
+
# @option options [true, false] :categories
|
166
|
+
# @return [ThisIs]
|
167
|
+
def what(**options)
|
168
|
+
@owner.this(title, **options)
|
169
|
+
end
|
170
|
+
|
171
|
+
private
|
172
|
+
|
173
|
+
def clean_data
|
174
|
+
@data.reject { |_, v| v.nil? || v.respond_to?(:empty?) && v.empty? }
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
require_relative 'thisis/ambigous'
|
180
|
+
require_relative 'thisis/notfound'
|
181
|
+
require_relative 'thisis/link'
|
@@ -0,0 +1,128 @@
|
|
1
|
+
class WhatIs
|
2
|
+
class ThisIs
|
3
|
+
# Represents disambiguation page.
|
4
|
+
#
|
5
|
+
# You should never create instances of this class directly, but rather obtain it from {WhatIs#this}
|
6
|
+
# and {WhatIs#these}.
|
7
|
+
#
|
8
|
+
# `Ambigous` consists of {#variants}, each of them represented by a {ThisIs::Link} which can
|
9
|
+
# be {ThisIs::Link#resolve resolved}.
|
10
|
+
#
|
11
|
+
# @note This functionality (special wrapper for disambiguation
|
12
|
+
# pages) works only for those language Wikis which have their "disambiguation" category known
|
13
|
+
# to `WhatIs`. See {WhatIs::AMBIGOUS_CATEGORIES}.
|
14
|
+
#
|
15
|
+
# @example
|
16
|
+
# a = WhatIs.this('Bela Crkva')
|
17
|
+
# # => #<ThisIs::Ambigous Bela Crkva (6 options)>
|
18
|
+
# a.describe
|
19
|
+
# # => Bela Crkva: ambigous (6 options)
|
20
|
+
# # #<ThisIs::Link Bela Crkva, Banat>: Bela Crkva, Banat, a town in Vojvodina, Serbia
|
21
|
+
# # #<ThisIs::Link Bela Crkva, Krivogaštani>: Bela Crkva, Krivogaštani, a village in the Municipality of Krivogaštani, Macedonia
|
22
|
+
# # #<ThisIs::Link Bela Crkva (Krupanj)>: Bela Crkva (Krupanj), a village in the Mačva District of Serbia
|
23
|
+
# # #<ThisIs::Link Toplička Bela Crkva>: Toplička Bela Crkva, original name of the city of Kuršumlija, Serbia
|
24
|
+
# # #<ThisIs::Link See also/Bila Tserkva>: Bila Tserkva (Біла Церква), a city in the Kiev Oblast of Ukraine
|
25
|
+
# # #<ThisIs::Link See also/Byala Cherkva>: Byala Cherkva, a town in the Veliko Turnovo oblast of Bulgaria
|
26
|
+
# #
|
27
|
+
# # Usage: .variants[0].resolve, .resolve_all
|
28
|
+
#
|
29
|
+
# a.variants[0]
|
30
|
+
# # => #<ThisIs::Link Bela Crkva, Banat>
|
31
|
+
# a.variants[0].resolve
|
32
|
+
# # => #<ThisIs Bela Crkva, Banat [img] {44.897500,21.416944}>
|
33
|
+
# a.variants[0].resolve(categories: true)
|
34
|
+
# # => #<ThisIs Bela Crkva, Banat, 5 categories [img] {44.897500,21.416944}>
|
35
|
+
# a.resolve_all
|
36
|
+
# # => {"Bela Crkva, Banat"=>#<ThisIs Bela Crkva, Banat [img] {44.897500,21.416944}>, "Bela Crkva, Krivogaštani"=>#<ThisIs Bela Crkva, Krivogaštani {41.280833,21.345278}>, "Bela Crkva (Krupanj)"=>#<ThisIs Bela Crkva (Krupanj) [img] {44.395000,19.479400}>, "Toplička Bela Crkva"=>#<ThisIs Kuršumlija [img] {43.150000,21.266667}>, "Bila Tserkva"=>#<ThisIs Bila Tserkva [img] {49.798889,30.115278}>, "Byala Cherkva"=>#<ThisIs Byala Cherkva [img] {43.200000,25.300000}>}
|
37
|
+
#
|
38
|
+
class Ambigous
|
39
|
+
# Original [Infoboxer page](http://www.rubydoc.info/gems/infoboxer/Infoboxer/MediaWiki/Page) data.
|
40
|
+
# @return [Infoboxer::MediaWiki::Page]
|
41
|
+
attr_reader :page
|
42
|
+
|
43
|
+
# Each link can be {ThisIs::Link#resolve resolved} individually, like
|
44
|
+
# `ambigous.variants[0].resolve`, or you can resolve them all at once with {#resolve_all}.
|
45
|
+
#
|
46
|
+
# @return [Array<ThisIs::Link>]
|
47
|
+
attr_reader :variants
|
48
|
+
|
49
|
+
# @private
|
50
|
+
def initialize(owner, page)
|
51
|
+
@owner = owner
|
52
|
+
@page = page
|
53
|
+
@variants = extract_variants
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return [String]
|
57
|
+
def title
|
58
|
+
page.title
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return [String]
|
62
|
+
def inspect
|
63
|
+
"#<ThisIs::Ambigous #{title} (#{variants.count} options)>"
|
64
|
+
end
|
65
|
+
|
66
|
+
# @return [String]
|
67
|
+
def to_s
|
68
|
+
"#{title}: ambigous (#{variants.count} options)"
|
69
|
+
end
|
70
|
+
|
71
|
+
# @return [Hash]
|
72
|
+
def to_h
|
73
|
+
{
|
74
|
+
type: 'ThisIs::Ambigous',
|
75
|
+
title: title,
|
76
|
+
variants: variants.map(&:to_s)
|
77
|
+
}
|
78
|
+
end
|
79
|
+
|
80
|
+
# @return [String]
|
81
|
+
def to_json(opts)
|
82
|
+
to_h.to_json(opts)
|
83
|
+
end
|
84
|
+
|
85
|
+
# @return [Description]
|
86
|
+
def describe(help: true)
|
87
|
+
Description.new(
|
88
|
+
"#{self}\n" +
|
89
|
+
variants.map { |link| " #{link.inspect}: #{link.description}" }.join("\n") +
|
90
|
+
describe_help(help)
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Resolves all ambigous variants with one query.
|
95
|
+
# See {WhatIs#this} for options explanation.
|
96
|
+
#
|
97
|
+
# @param options [Hash]
|
98
|
+
# @option options [true, String, Symbol] :languages
|
99
|
+
# @option options [true, false] :categories
|
100
|
+
# @return [Hash{String => ThisIs}]
|
101
|
+
def resolve_all(**options)
|
102
|
+
@owner.these(*variants.map(&:title), **options)
|
103
|
+
end
|
104
|
+
|
105
|
+
private
|
106
|
+
|
107
|
+
def describe_help(render = true)
|
108
|
+
return '' unless render
|
109
|
+
"\n\n Usage: .variants[0].resolve, .resolve_all"
|
110
|
+
end
|
111
|
+
|
112
|
+
def extract_variants
|
113
|
+
page.wikipath('//ListItem')
|
114
|
+
.reject { |item| item.wikilinks.empty? }
|
115
|
+
.map(&method(:item_to_link))
|
116
|
+
end
|
117
|
+
|
118
|
+
def item_to_link(item)
|
119
|
+
Link.new(
|
120
|
+
item.wikilinks.first.link,
|
121
|
+
owner: @owner,
|
122
|
+
section: item.in_sections.map(&:heading).map(&:text_).reverse.reject(&:empty?).join('/'),
|
123
|
+
description: item.children.map(&:text).join
|
124
|
+
)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
class WhatIs
|
2
|
+
using Refinements
|
3
|
+
|
4
|
+
class ThisIs
|
5
|
+
# Represents link to some entity that can be resolved to proper entity definition.
|
6
|
+
#
|
7
|
+
# You should never create instances of this class directly, it occurs as variant links from
|
8
|
+
# {Ambigous}, and as {ThisIs::languages} links.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# # Ambigous variants link
|
12
|
+
# a = WhatIs.this('Bela Crkva')
|
13
|
+
# # => #<ThisIs::Ambigous Bela Crkva (6 options)>
|
14
|
+
# a.variants[0]
|
15
|
+
# # => #<ThisIs::Link Bela Crkva, Banat>
|
16
|
+
# a.variants[0].resolve
|
17
|
+
# # => #<ThisIs Bela Crkva, Banat [img] {44.897500,21.416944}>
|
18
|
+
#
|
19
|
+
# # Languages link
|
20
|
+
# paris = WhatIs.this('Paris', languages: :ru)
|
21
|
+
# # => #<ThisIs Paris/Париж, [img] {48.856700,2.350800}>
|
22
|
+
# paris.languages
|
23
|
+
# # => {"ru"=>#<ThisIs::Link ru:Париж>}
|
24
|
+
# paris.languages['ru'].resolve(categories: true)
|
25
|
+
# # => #<ThisIs Париж, 10 categories [img] {48.833333,2.333333}>
|
26
|
+
#
|
27
|
+
class Link
|
28
|
+
# @return [String]
|
29
|
+
attr_reader :title
|
30
|
+
# @private
|
31
|
+
attr_reader :language
|
32
|
+
|
33
|
+
# @private
|
34
|
+
# For pretty output only
|
35
|
+
attr_reader :section, :description
|
36
|
+
|
37
|
+
# @private
|
38
|
+
def initialize(title, section: nil, owner: nil, language: nil, description: nil)
|
39
|
+
@owner = owner
|
40
|
+
@title = title
|
41
|
+
@language = language&.to_s
|
42
|
+
@section = section unless section == ''
|
43
|
+
@description = description
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [String]
|
47
|
+
def inspect
|
48
|
+
"#<ThisIs::Link #{language&.append(':')}#{section&.append('/')}#{title}>"
|
49
|
+
end
|
50
|
+
|
51
|
+
alias to_s title
|
52
|
+
|
53
|
+
# Resolves the link, fetching entity from Wikipedia API.
|
54
|
+
#
|
55
|
+
# See {WhatIs#this} for options explanation.
|
56
|
+
#
|
57
|
+
# @param options [Hash]
|
58
|
+
# @option options [true, String, Symbol] :languages
|
59
|
+
# @option options [true, false] :categories
|
60
|
+
# @return [ThisIs, ThisIs::Ambigous]
|
61
|
+
def resolve(**options)
|
62
|
+
engine = @owner || language && WhatIs[language] or
|
63
|
+
fail "Can't resolve #{inspect}"
|
64
|
+
|
65
|
+
engine.this(title, **options)
|
66
|
+
end
|
67
|
+
|
68
|
+
# @private
|
69
|
+
# For tests only
|
70
|
+
def ==(other)
|
71
|
+
other.is_a?(Link) && other.language == language && other.title == title
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|