whatis 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +149 -0
- data/exe/whatis +46 -0
- data/lib/whatis.rb +209 -0
- data/lib/whatis/cli.rb +38 -0
- data/lib/whatis/formatter.rb +51 -0
- data/lib/whatis/refinements.rb +34 -0
- data/lib/whatis/thisis.rb +181 -0
- data/lib/whatis/thisis/ambigous.rb +128 -0
- data/lib/whatis/thisis/link.rb +75 -0
- data/lib/whatis/thisis/notfound.rb +70 -0
- data/whatis.gemspec +55 -0
- metadata +283 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0bc1cdd23a45a493b8ef361682d38309bcc8758f
|
4
|
+
data.tar.gz: 9d6b933c9d46756ed745b569cf6c40db265befd5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b64b7110f7d840a12e350523712655d138848efcef71932fae5bfc0e5e54b7ee37c4f14fced08a32cfb7caafd42a78206ed9ab5d02dd1822e3d5bffa51229dea
|
7
|
+
data.tar.gz: 9d9e5e4bf040acd0b4d0f58b1d2d443f6f6020b7fd19fc68ba0a99519e0906493774ccb75ff7a6394d4a9915f3c1c63c0d6021a1cd26f2224d51b34ee9d4a10f
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014-15 Victor 'Zverok' Shepelev
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
# WhatIs.this
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/whatis.svg)](http://badge.fury.io/rb/whatis)
|
4
|
+
[![Build Status](https://travis-ci.org/molybdenum-99/whatis.svg?branch=master)](https://travis-ci.org/molybdenum-99/whatis)
|
5
|
+
|
6
|
+
**WhatIs.this** is a quick probe for the meaning and metadata of concepts through Wikipedia.
|
7
|
+
|
8
|
+
## Showcase
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
require 'whatis'
|
12
|
+
|
13
|
+
sparta = WhatIs.this('Sparta')
|
14
|
+
# => #<ThisIs Sparta [img] {37.081944,22.423611}>
|
15
|
+
sparta.coordinates
|
16
|
+
# => #<Geo::Coord 37.081944,22.423611>
|
17
|
+
sparta.image
|
18
|
+
# => "https://upload.wikimedia.org/wikipedia/commons/6/6c/Sparta_territory.jpg"
|
19
|
+
|
20
|
+
sparta.describe
|
21
|
+
# => Sparta
|
22
|
+
# title: "Sparta"
|
23
|
+
# description: "city-state in ancient Greece"
|
24
|
+
# coordinates: #<Geo::Coord 37.081944,22.423611>
|
25
|
+
# extract: "Sparta (Doric Greek: ; Attic Greek: ) was a prominent city-state in ancient Greece."
|
26
|
+
# image: "https://upload.wikimedia.org/wikipedia/commons/6/6c/Sparta_territory.jpg"
|
27
|
+
|
28
|
+
# Fetch additional information: categories & translations:
|
29
|
+
sparta = WhatIs.this('Sparta', categories: true, languages: 'el')
|
30
|
+
# => #<ThisIs Sparta/Αρχαία Σπάρτη, 7 categories [img] {37.081944,22.423611}>
|
31
|
+
sparta.describe
|
32
|
+
# => Sparta
|
33
|
+
# title: "Sparta"
|
34
|
+
# description: "city-state in ancient Greece"
|
35
|
+
# coordinates: #<Geo::Coord 37.081944,22.423611>
|
36
|
+
# categories: ["Former countries in Europe", "Former populated places in Greece", "Locations in Greek mythology", "Populated places in Laconia", "Sparta", "States and territories disestablished in the 2nd century BC", "States and territories established in the 11th century BC"]
|
37
|
+
# languages: {"el"=>#<ThisIs::Link el:Αρχαία Σπάρτη>}
|
38
|
+
# extract: "Sparta (Doric Greek: ; Attic Greek: ) was a prominent city-state in ancient Greece."
|
39
|
+
# image: "https://upload.wikimedia.org/wikipedia/commons/6/6c/Sparta_territory.jpg"
|
40
|
+
|
41
|
+
sparta.languages['el'].resolve
|
42
|
+
# => #<ThisIs Αρχαία Σπάρτη [img]>
|
43
|
+
|
44
|
+
# Multiple entities at once:
|
45
|
+
WhatIs.these('Paris', 'Berlin', 'Rome', 'Athens')
|
46
|
+
# => {
|
47
|
+
# "Paris"=>#<ThisIs Paris [img] {48.856700,2.350800}>,
|
48
|
+
# "Berlin"=>#<ThisIs Berlin [img] {52.516667,13.388889}>,
|
49
|
+
# "Rome"=>#<ThisIs Rome [img] {41.900000,12.500000}>,
|
50
|
+
# "Athens"=>#<ThisIs Athens [img] {37.983972,23.727806}>
|
51
|
+
# }
|
52
|
+
```
|
53
|
+
## Applications
|
54
|
+
|
55
|
+
The gem is intended to be a simple tool for entities resolution/normalization. Possible usages:
|
56
|
+
|
57
|
+
* You have a lot of user-entered answers to "What city are you from". Through `WhatIs.these` it is
|
58
|
+
pretty easy to resolve them to "canonical" city name (e.g. "Warsaw", "Warszawa", "Warsaw, Poland" =>
|
59
|
+
"Warsaw") and map locations;
|
60
|
+
* Quick check on user-entered cultural objects, "what is it";
|
61
|
+
* Canonical Wikipedia-powered translations of toponyms, movie titles and historical people;
|
62
|
+
* ...and so-on.
|
63
|
+
|
64
|
+
## Features/problems
|
65
|
+
|
66
|
+
* Fetches Wikipedia data by entity names: canonical title, geographical coordinates, main page image,
|
67
|
+
the first phrase, short entity description from Wikidata;
|
68
|
+
* Optionally fetches links to other Wikipedia languages and list of page categories;
|
69
|
+
* Fetches any number of Wikipedia pages in minimal number of API requests (50-page batches);
|
70
|
+
* Note that despite this optimization, Wikipedia API responses are not very small, so resolving,
|
71
|
+
say, 1000 entities, will errrm _take some time_;
|
72
|
+
* Works with any language version of Wikipedia:
|
73
|
+
```ruby
|
74
|
+
WhatIs[:de].this('München')
|
75
|
+
# => #<ThisIs München [img] {48.137222,11.575556}>
|
76
|
+
```
|
77
|
+
* Handles not found pages and allows to search them in place:
|
78
|
+
```ruby
|
79
|
+
g = WhatIs.this('Guardians Of The Galaxy') # Wikipedia pages is case-sensitive
|
80
|
+
# => #<ThisIs::NotFound Guardians Of The Galaxy>
|
81
|
+
g.search(3)
|
82
|
+
# => [#<ThisIs::Ambigous Guardians of the Galaxy (11 options)>, #<ThisIs Guardians of the Galaxy (film)>, #<ThisIs Guardians of the Galaxy Vol. 2>]
|
83
|
+
```
|
84
|
+
* Handles disambiguation pages:
|
85
|
+
```ruby
|
86
|
+
g = WhatIs.this('Guardians of the Galaxy')
|
87
|
+
# => #<ThisIs::Ambigous Guardians of the Galaxy (11 options)>
|
88
|
+
g.describe
|
89
|
+
# => Guardians of the Galaxy: ambigous (11 options)
|
90
|
+
# #<ThisIs::Link Marvel Comics teams/Guardians of the Galaxy (1969 team)>: Guardians of the Galaxy (1969 team), the original 31st-century team from an alternative timeline of the Marvel Universe (Earth-691)
|
91
|
+
# #<ThisIs::Link Marvel Comics teams/Guardians of the Galaxy (2008 team)>: Guardians of the Galaxy (2008 team), the modern version of the team formed in the aftermath of Annihilation: Conquest
|
92
|
+
# <...skip...>
|
93
|
+
# Usage: .variants[0].resolve, .resolve_all
|
94
|
+
g.variants[1].resolve(categories: true)
|
95
|
+
# => #<ThisIs Guardians of the Galaxy (2008 team), 13 categories>
|
96
|
+
```
|
97
|
+
* Provides command-line tool:
|
98
|
+
```
|
99
|
+
$ whatis Paris Berlin Rome
|
100
|
+
Paris: Paris {48.856700,2.350800} - capital city of France
|
101
|
+
Berlin: Berlin {52.516667,13.388889} - capital city of Germany
|
102
|
+
Rome: Rome {41.900000,12.500000} - capital city of Italy
|
103
|
+
|
104
|
+
$ whatis --help
|
105
|
+
Usage: `whatis [options] title1, title2, title3
|
106
|
+
|
107
|
+
Options:
|
108
|
+
-l, --language CODE Which language Wikipedia to ask, 2-letter code. "en" by default
|
109
|
+
-t, --languages [CODE] Without argument, fetches all translations for entity.
|
110
|
+
With argument (two-letter code) fetches only one translation.
|
111
|
+
By default, no translations are fetched.
|
112
|
+
--categories Whether to fetch entity categories
|
113
|
+
-f, --format FORMAT Output format: one line per entity ("short"), several lines per
|
114
|
+
entity ("long"), or "json". Default is "short".
|
115
|
+
-h, --help Show this message
|
116
|
+
```
|
117
|
+
|
118
|
+
### Note on disambiguation pages
|
119
|
+
|
120
|
+
Unfortunately, Wikipedia does not provide a consistent way to tell disambiguation pages from others,
|
121
|
+
the only way is to know is to see the page's categories (different for different languages). Therefore,
|
122
|
+
currently, disambiguation works currently for English, Ukrainian, Russian and Belorussian. Feel free
|
123
|
+
to contribute disambiguation categories for your language versions!
|
124
|
+
|
125
|
+
## Usage
|
126
|
+
|
127
|
+
`gem install whatis` or add `gem "whatis"` to your `Gemfile`.
|
128
|
+
|
129
|
+
Then use it as library (see docs for [WhatIs](www.rubydoc.info/gems/whatis/WhatIs) and its methods)
|
130
|
+
or command-line tool (try `$ whatis --help`).
|
131
|
+
|
132
|
+
## How it works
|
133
|
+
|
134
|
+
`WhatIs.this` is a small brother of large [reality](https://github.com/molybdenum-99/reality). Under
|
135
|
+
the hood, it uses [infoboxer](https://github.com/molybdenum-99/infoboxer) semantic Wikipedia client.
|
136
|
+
|
137
|
+
Most of the information is taken from API response metadata, but for some features (ambiguities
|
138
|
+
resolution), Wikipedia page is actually parsed.
|
139
|
+
|
140
|
+
Unlike `reality` (which tries to be _comprehensive_), `WhatIs.this` tries to be as simple yet useful,
|
141
|
+
as possible.
|
142
|
+
|
143
|
+
## Author
|
144
|
+
|
145
|
+
[Victor Shepelev](http://zverok.github.io)
|
146
|
+
|
147
|
+
## License
|
148
|
+
|
149
|
+
MIT
|
data/exe/whatis
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require_relative '../lib/whatis'
|
3
|
+
require_relative '../lib/whatis/cli'
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
options = OpenStruct.new
|
8
|
+
options.language = 'en'
|
9
|
+
options.format = 'short'
|
10
|
+
|
11
|
+
parser = OptionParser.new do |opts|
|
12
|
+
opts.banner = 'Usage: `whatis [options] title1, title2, title3'
|
13
|
+
|
14
|
+
opts.separator ''
|
15
|
+
opts.separator 'Options:'
|
16
|
+
|
17
|
+
opts.on('-l', '--language CODE', 'Which language Wikipedia to ask, 2-letter code. "en" by default') do |lang|
|
18
|
+
options.language = lang
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on('-t', '--languages [CODE]', 'Without argument, fetches all translations for entity. With argument (two-letter code) fetches only one translation. By default, no translations are fetched.') do |langs|
|
22
|
+
options.languages = langs || true
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on('--categories', 'Whether to fetch entity categories') do
|
26
|
+
options.categories = true
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on('-f', '--format FORMAT', %w[short long json], 'Output format: one line per entity ("short"), several lines per entity ("long"), "json". Default is "short"') do |format|
|
30
|
+
options.format = format
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on_tail('-h', '--help', 'Show this message') do
|
34
|
+
puts opts
|
35
|
+
exit
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
parser.parse!(ARGV)
|
40
|
+
|
41
|
+
if ARGV.empty?
|
42
|
+
puts parser
|
43
|
+
exit
|
44
|
+
end
|
45
|
+
|
46
|
+
puts WhatIs::CLI.new(ARGV, options).run
|
data/lib/whatis.rb
ADDED
@@ -0,0 +1,209 @@
|
|
1
|
+
require 'infoboxer'
|
2
|
+
require 'geo/coord'
|
3
|
+
require 'backports/2.4.0/hash/transform_values'
|
4
|
+
|
5
|
+
# `WhatIs` is a simple entity resolver through Wikipedia.
|
6
|
+
#
|
7
|
+
# @example
|
8
|
+
# # Simplest usage
|
9
|
+
# WhatIs.this('Sparta') # => #<ThisIs Sparta [img] {37.081944,22.423611}>
|
10
|
+
#
|
11
|
+
# # Additional options
|
12
|
+
# WhatIs.this('Sparta', languages: :el, categories: true)
|
13
|
+
# # => #<ThisIs Sparta/Αρχαία Σπάρτη, 7 categories [img] {37.081944,22.423611}>
|
14
|
+
#
|
15
|
+
# # Several pages at once (in batch requests to Wikipedia API)
|
16
|
+
# WhatIs.these('Paris', 'Athens', 'Rome')
|
17
|
+
# # => {"Paris"=>#<ThisIs Paris [img] {48.856700,2.350800}>, "Athens"=>#<ThisIs Athens [img] {37.983972,23.727806}>, "Rome"=>#<ThisIs Rome [img] {41.900000,12.500000}>}
|
18
|
+
#
|
19
|
+
# # Other language Wikipedia
|
20
|
+
# WhatIs[:ru].this('Спарта') # => #<ThisIs Спарта [img]>
|
21
|
+
#
|
22
|
+
# See {#this} and {#these} methods docs for details on call sequence and options, and response classes:
|
23
|
+
#
|
24
|
+
# * {ThisIs} -- normal response object;
|
25
|
+
# * {ThisIs::Ambigous} -- response object representing disambiguation page;
|
26
|
+
# * {ThisIs::NotFound} -- response object for not found page, includes search for term service.
|
27
|
+
#
|
28
|
+
class WhatIs
|
29
|
+
# This constant lists Wikipedia ambiguity categories per Wikipedia language. For {ThisIs::Ambigous}
|
30
|
+
# feature to work for your language, this list should include it.
|
31
|
+
AMBIGOUS_CATEGORIES = {
|
32
|
+
be: ['Катэгорыя:Неадназначнасці'],
|
33
|
+
en: ['Category:All disambiguation pages', 'Category:All set index articles'],
|
34
|
+
ru: ['Категория:Страницы значений по алфавиту'],
|
35
|
+
uk: ['Категорія:Всі статті визначеного індексу', 'Категорія:Всі сторінки неоднозначності статей']
|
36
|
+
}.freeze
|
37
|
+
|
38
|
+
# String-like class, with the only difference for how its #inspect is represented.
|
39
|
+
#
|
40
|
+
# Used for {ThisIs#describe} method for its answer to be readable in Ruby console (IRB or Pry)
|
41
|
+
#
|
42
|
+
# @example
|
43
|
+
# "foo\nbar"
|
44
|
+
# # => "foo\nbar"
|
45
|
+
# Description.new("foo\nbar")
|
46
|
+
# # => foo
|
47
|
+
# # bar
|
48
|
+
#
|
49
|
+
class Description < String
|
50
|
+
alias inspect to_s # Allows pretty inspect of multi-line descriptions
|
51
|
+
end
|
52
|
+
|
53
|
+
class << self
|
54
|
+
# @param lang [Symbol, String] Wikipedia version language code, usually two-letter ("en", "fr"),
|
55
|
+
# but not for all languages (for example, "be-x-old" or "zh-classical").
|
56
|
+
#
|
57
|
+
# @return [WhatIs]
|
58
|
+
def [](lang)
|
59
|
+
all[lang.to_s]
|
60
|
+
end
|
61
|
+
|
62
|
+
# Shortcut for `WhatIs[:en].these`, see {#these} for details.
|
63
|
+
# @param titles [Array<String>] Titles of entities to resolve.
|
64
|
+
# @option options [true, String, Symbol] :languages If `true`, fetches all titles of languages versions
|
65
|
+
# of entity resolved; if some language code (like "ru" or "zh-classical") is passed, fetches only
|
66
|
+
# this language's title, the latter is faster.
|
67
|
+
# @option options [true, false] :categories Fetch entity's categories. Due to how Wikipedia's API
|
68
|
+
# work, this may lead to additional API calls, so use with caution.
|
69
|
+
# @return [Hash{String => ThisIs, ThisIs::Ambigous, ThisIs::NotFound}] Hash keys are original
|
70
|
+
# title requested, so it is easy to find how particular title was resolved.
|
71
|
+
def these(*titles, **options)
|
72
|
+
self[:en].these(*titles, **options)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Shortcut for `WhatIs[:en].this`, see {#this} for details.
|
76
|
+
# @param title [String] Title of the entity to resolve.
|
77
|
+
# @param options [Hash]
|
78
|
+
# @option options [true, String, Symbol] :languages If `true`, fetches all titles of languages versions
|
79
|
+
# of entity resolved; if some language code (like "ru" or "zh-classical") is passed, fetches only
|
80
|
+
# this language's title, the latter is faster.
|
81
|
+
# @option options [true, false] :categories Fetch entity's categories. Due to how Wikipedia's API
|
82
|
+
# work, this may lead to additional API calls, so use with caution.
|
83
|
+
# @return [ThisIs, ThisIs::Ambigous, ThisIs::NotFound]
|
84
|
+
def this(title, **options)
|
85
|
+
self[:en].this(title, **options)
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
def all
|
91
|
+
@all ||= Hash.new { |h, lang| h[lang] = WhatIs.new(lang) }
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# @private
|
96
|
+
attr_reader :language
|
97
|
+
|
98
|
+
# @private
|
99
|
+
def initialize(language = :en)
|
100
|
+
@language = language
|
101
|
+
@infoboxer = Infoboxer.wikipedia(language)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Batch resolving of several entities. Wikipedia API allows batch fetching of pages, so there would
|
105
|
+
# be roughly 1 API call for each 50 entities. Number of API calls could be larger if additional
|
106
|
+
# information (languages, categories) is requested.
|
107
|
+
#
|
108
|
+
# @param titles [Array<String>] Titles of entities to resolve.
|
109
|
+
# @option options [true, String, Symbol] :languages If `true`, fetches all titles of languages versions
|
110
|
+
# of entity resolved; if some language code (like "ru" or "zh-classical") is passed, fetches only
|
111
|
+
# this language's title, the latter is faster.
|
112
|
+
# @option options [true, false] :categories Fetch entity's categories. Due to how Wikipedia's API
|
113
|
+
# work, this may lead to additional API calls, so use with caution.
|
114
|
+
# @return [Hash{String => ThisIs, ThisIs::Ambigous, ThisIs::NotFound}] Hash keys are original
|
115
|
+
# title requested, so it is easy to find how particular title was resolved.
|
116
|
+
# @see #this #this: singular entity resolution.
|
117
|
+
# @example
|
118
|
+
# WhatIs.these('Warszawa', 'Warsaw', 'Berlin', 'Kharkov', languages: :fr)
|
119
|
+
# # => {
|
120
|
+
# # "Warszawa" => #<ThisIs Warsaw/Varsovie [img] {52.233333,21.016667}>,
|
121
|
+
# # "Warsaw" => #<ThisIs Warsaw/Varsovie [img] {52.233333,21.016667}>,
|
122
|
+
# # "Berlin" => #<ThisIs Berlin/Berlin [img] {52.516667,13.388889}>,
|
123
|
+
# # "Kharkov" => #<ThisIs Kharkiv/Kharkiv [img] {50.004444,36.231389}>,
|
124
|
+
# # "Bela Crkva" => #<ThisIs::Ambigous Bela Crkva (6 options)>,
|
125
|
+
# # "Hrmpf" => #<ThisIs::NotFound Hrmpf>
|
126
|
+
# # }
|
127
|
+
def these(*titles, **options)
|
128
|
+
titles.any? or
|
129
|
+
fail(ArgumentError, "Usage: `these('Title 1', 'Title 2', ..., **options). At least one title is required.")
|
130
|
+
@infoboxer
|
131
|
+
.get_h(*titles) { |req| setup_request(req, **options) }
|
132
|
+
.map { |title, page| [title, ThisIs.create(self, title, page)] }.to_h
|
133
|
+
end
|
134
|
+
|
135
|
+
# Resolves one entity through Wikipedia API.
|
136
|
+
#
|
137
|
+
# @param title [String] Title of the entity to resolve.
|
138
|
+
# @param options [Hash]
|
139
|
+
# @option options [true, String, Symbol] :languages If `true`, fetches all titles of languages versions
|
140
|
+
# of entity resolved; if some language code (like "ru" or "zh-classical") is passed, fetches only
|
141
|
+
# this language's title, the latter is faster.
|
142
|
+
# @option options [true, false] :categories Fetch entity's categories. Due to how Wikipedia's API
|
143
|
+
# work, this may lead to additional API calls, so use with caution.
|
144
|
+
# @return [ThisIs, ThisIs::Ambigous, ThisIs::NotFound]
|
145
|
+
# @note Special {ThisIs::Ambigous} wrapper for disambiguation pages can be created only for those
|
146
|
+
# language Wikipedias which `WhatIs` knows "disambiguation" category name, see {AMBIGOUS_CATEGORIES}.
|
147
|
+
# @see #these #these: batch fetching several entities.
|
148
|
+
# @example
|
149
|
+
# WhatIs.this('Warszawa', languages: :fr)
|
150
|
+
# # => #<ThisIs Warsaw/Varsovie [img] {52.233333,21.016667}>
|
151
|
+
# WhatIs.this('Bela Crkva', languages: :fr)
|
152
|
+
# # => #<ThisIs::Ambigous Bela Crkva (6 options)>
|
153
|
+
# WhatIs.this('Hrmpf', languages: :fr)
|
154
|
+
# # => #<ThisIs::NotFound Hrmpf>
|
155
|
+
def this(title, **options)
|
156
|
+
these(title, **options).values.first
|
157
|
+
end
|
158
|
+
|
159
|
+
# @return [String]
|
160
|
+
def inspect
|
161
|
+
"#<WhatIs(#{language}). Usage: .this(*pages, **options)>"
|
162
|
+
end
|
163
|
+
|
164
|
+
# @private
|
165
|
+
# Used by {ThisIs::NotFound#search}
|
166
|
+
def search(title, limit = 5, **options)
|
167
|
+
@infoboxer
|
168
|
+
.search(title, limit: limit) { |req| setup_request(req, **options) }
|
169
|
+
.map { |page| ThisIs.create(self, page.title, page) }
|
170
|
+
end
|
171
|
+
|
172
|
+
# @private
|
173
|
+
def ambigous_categories
|
174
|
+
@ambigous_categories = AMBIGOUS_CATEGORIES.fetch(language.to_sym, [])
|
175
|
+
end
|
176
|
+
|
177
|
+
private
|
178
|
+
|
179
|
+
def setup_request(request, categories: false, languages: false, **) # rubocop:disable Metrics/MethodLength
|
180
|
+
request = request
|
181
|
+
.prop(:coordinates)
|
182
|
+
.prop(:extracts).sentences(1)
|
183
|
+
.prop(:pageimages).prop(:original)
|
184
|
+
.prop(:pageterms)
|
185
|
+
|
186
|
+
request = setup_categories(request, categories)
|
187
|
+
if languages
|
188
|
+
request = request.prop(:langlinks)
|
189
|
+
request = request.lang(languages) unless languages == true
|
190
|
+
end
|
191
|
+
request
|
192
|
+
end
|
193
|
+
|
194
|
+
def setup_categories(request, categories_requested)
|
195
|
+
if categories_requested
|
196
|
+
# Fetch all categories, include "hidden" flag to filter out internal
|
197
|
+
request.prop(:categories).prop(:hidden)
|
198
|
+
elsif !ambigous_categories.empty?
|
199
|
+
# Fetch only "ambigous" categories, to tell ambigous pages out
|
200
|
+
request.prop(:categories).categories(*ambigous_categories)
|
201
|
+
else
|
202
|
+
request
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
require_relative 'whatis/refinements'
|
208
|
+
require_relative 'whatis/thisis'
|
209
|
+
require_relative 'whatis/formatter'
|
data/lib/whatis/cli.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
class WhatIs
|
2
|
+
# @private
|
3
|
+
class CLI
|
4
|
+
def initialize(titles, options)
|
5
|
+
@whatis = WhatIs[options.language]
|
6
|
+
@titles = titles
|
7
|
+
@options = {categories: options.categories, languages: options.languages}
|
8
|
+
@format = options.format
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
__send__("#{@format}_format", @whatis.these(*@titles, **@options))
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def short_format(objects)
|
18
|
+
formatter = Formatter.new
|
19
|
+
objects.map { |title, o| formatter.call(title, o) }.join("\n")
|
20
|
+
end
|
21
|
+
|
22
|
+
def long_format(objects)
|
23
|
+
objects.flat_map { |title, o|
|
24
|
+
[
|
25
|
+
'',
|
26
|
+
title,
|
27
|
+
'-' * title.length,
|
28
|
+
o.describe(help: false)
|
29
|
+
]
|
30
|
+
}.join("\n")
|
31
|
+
end
|
32
|
+
|
33
|
+
def json_format(objects)
|
34
|
+
require 'json'
|
35
|
+
JSON.pretty_generate(objects)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|