whatis 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ class WhatIs
2
+ using Refinements
3
+
4
+ # @private
5
+ class Formatter
6
+ def call(title, object)
7
+ str =
8
+ case object
9
+ when ThisIs
10
+ format_thisis(object)
11
+ when ThisIs::Ambigous
12
+ format_ambigous(object)
13
+ when ThisIs::NotFound
14
+ format_notfound(object)
15
+ end
16
+ "#{title}: #{str}"
17
+ end
18
+
19
+ private
20
+
21
+ def format_thisis(object)
22
+ [
23
+ object.title,
24
+ object.coordinates&.to_s&.surround(' {', '}'),
25
+ ' - ',
26
+ short_description(object)
27
+ ].join
28
+ end
29
+
30
+ def short_description(obj) # rubocop:disable Metrics/AbcSize
31
+ case
32
+ when obj.categories.any?
33
+ obj.categories.sort.join('; ')
34
+ when obj.languages.count == 1
35
+ obj.languages.values.first
36
+ when !obj.description.to_s.empty?
37
+ obj.description
38
+ else
39
+ obj.extract
40
+ end
41
+ end
42
+
43
+ def format_ambigous(object)
44
+ "#{object.title}, #{object.variants.count} options - #{object.variants.join('; ')}"
45
+ end
46
+
47
+ def format_notfound(*)
48
+ 'not found'
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,34 @@
1
+ class WhatIs
2
+ # @private
3
+ module Refinements
4
+ refine String do
5
+ def append(after)
6
+ "#{self}#{after}"
7
+ end
8
+
9
+ def prepend(before)
10
+ "#{before}#{self}"
11
+ end
12
+
13
+ def surround(before, after = before)
14
+ "#{before}#{self}#{after}"
15
+ end
16
+ end
17
+
18
+ refine Object do
19
+ def yield_self
20
+ yield self
21
+ end
22
+
23
+ def iff
24
+ yield(self) ? self : nil
25
+ end
26
+ end
27
+
28
+ refine Hash do
29
+ def transform_keys
30
+ map { |key, val| [yield(key), val] }.to_h
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,181 @@
1
+ class WhatIs
2
+ using Refinements
3
+
4
+ # Represents one resolved entity, provides introspection and access to individual properties.
5
+ # You should never create instances of this class directly, but rather obtain it from {WhatIs#this}
6
+ # and {WhatIs#these}.
7
+ #
8
+ # @example
9
+ # paris = WhatIs.this('Paris')
10
+ # # => #<ThisIs Paris [img] {48.856700,2.350800}>
11
+ # paris.describe
12
+ # # => Paris
13
+ # # title: "Paris"
14
+ # # description: "capital city of France"
15
+ # # coordinates: #<Geo::Coord 48.856700,2.350800>
16
+ # # extract: "Paris (French pronunciation: ​[paʁi] ( listen)) is the capital and most populous city of France, with an administrative-limits area of 105 square kilometres (41 square miles) and a 2015 population of 2,229,621."
17
+ # # image: "https://upload.wikimedia.org/wikipedia/commons/0/08/Seine_and_Eiffel_Tower_from_Tour_Saint_Jacques_2013-08.JPG"
18
+ # #
19
+ # paris.coordinates
20
+ # # => #<Geo::Coord 48.856700,2.350800>
21
+ # paris2 = paris.what(languages: :ru, categories: true) # fetch more details
22
+ # # => #<ThisIs Paris/Париж, 12 categories [img] {48.856700,2.350800}>
23
+ # paris2.describe
24
+ # # => Paris
25
+ # # title: "Paris"
26
+ # # description: "capital city of France"
27
+ # # coordinates: #<Geo::Coord 48.856700,2.350800>
28
+ # # categories: ["3rd-century BC establishments", "Capitals in Europe", "Catholic pilgrimage sites", "Cities in France", "Cities in Île-de-France", "Companions of the Liberation", "Departments of Île-de-France", "European culture", "French culture", "Paris", "Populated places established in the 3rd century BC", "Prefectures in France"]
29
+ # # languages: {"ru"=>#<ThisIs::Link ru:Париж>}
30
+ # # extract: "Paris (French pronunciation: ​[paʁi] ( listen)) is the capital and most populous city of France, with an administrative-limits area of 105 square kilometres (41 square miles) and a 2015 population of 2,229,621."
31
+ # # image: "https://upload.wikimedia.org/wikipedia/commons/0/08/Seine_and_Eiffel_Tower_from_Tour_Saint_Jacques_2013-08.JPG"
32
+ # paris2.languages['ru'].resolve(categories: true)
33
+ # # => #<ThisIs Париж, 10 categories [img] {48.833333,2.333333}>
34
+ #
35
+ # See also:
36
+ #
37
+ # * {ThisIs::Ambigous} Representing disambiguation page, allows fetching variants.
38
+ # * {ThisIs::NotFound} Representing not found entity, allows searching for possible options.
39
+ class ThisIs
40
+ # @private
41
+ EXTRACTORS = {
42
+ title: ->(page) { page.title },
43
+ description: ->(page) { page.source.dig('terms', 'description', 0) },
44
+ coordinates: ->(page) {
45
+ coord = page.source['coordinates']&.first or return nil
46
+ Geo::Coord.from_h(coord)
47
+ },
48
+ categories: ->(page) {
49
+ Array(page.source['categories'])
50
+ .reject { |c| c.key?('hidden') }
51
+ .map { |c| c['title'].split(':', 2).last }
52
+ },
53
+ languages: ->(page) {
54
+ Array(page.source['langlinks'])
55
+ .map { |l| [l['lang'], l['*']] }
56
+ .map { |code, title| [code, Link.new(title, language: code)] }.to_h
57
+ .to_h
58
+ },
59
+ extract: ->(page) {
60
+ # remove HTML tags
61
+ # NB: Wikipedia "extracts" submodule has "plaintext=true" option, but it produces wrong 1-sentece
62
+ # extracts (broken by first ".", which can be somewhere in transcription of the main entity).
63
+ # HTML extracts, on the other hand, return proper sentences
64
+ #
65
+ # Link: https://en.wikipedia.org/w/api.php?action=help&modules=query%2Bextracts
66
+ page.source['extract']&.gsub(/<[^>]+>/, '')&.strip
67
+ },
68
+ image: ->(page) { page.source.dig('original', 'source') }
69
+ }.freeze
70
+
71
+ # @private
72
+ def self.create(owner, title, page)
73
+ case
74
+ when page.nil?
75
+ NotFound.new(owner, title)
76
+ when Array(page.source['categories']).any? { |c| owner.ambigous_categories.include?(c['title']) }
77
+ Ambigous.new(owner, page)
78
+ else
79
+ new(owner, page)
80
+ end
81
+ end
82
+
83
+ # Original [Infoboxer page](http://www.rubydoc.info/gems/infoboxer/Infoboxer/MediaWiki/Page) data.
84
+ # @return [Infoboxer::MediaWiki::Page]
85
+ attr_reader :page
86
+
87
+ # @private
88
+ def initialize(owner, page)
89
+ @owner = owner
90
+ @page = page
91
+ @data = EXTRACTORS.map { |sym, proc| [sym, proc.call(page)] }.to_h
92
+ end
93
+
94
+ # @!method title
95
+ # Title of Wikipedia page
96
+ # @return [String]
97
+ # @!method description
98
+ # Short entity description phrase from Wikidata. Not always present.
99
+ # @return [String]
100
+ # @!method extract
101
+ # First sentence of Wikipedia page
102
+ # @return [String]
103
+ # @!method coordinates
104
+ # Geographical coordinates, associated with the page, if known, wrapped in
105
+ # [Geo::Coord](https://github.com/zverok/geo_coord) type.
106
+ # @return [Geo::Coord]
107
+ # @!method image
108
+ # URL of page's main image, if known.
109
+ # @return [Geo::Coord]
110
+ # @!method categories
111
+ # List of page's categories, present only if page was fetched with `categories: true` option.
112
+ # @return [Array<String>]
113
+ # @!method languages
114
+ # Hash of other language version of page. Present only if the page wath fetched with `:languages`
115
+ # option. Keys are language codes, values are {ThisIs::Link} objects, allowing to fetch corresponding
116
+ # entities with {ThisIs::Link#resolve}.
117
+ # @return [Hash{String => ThisIs::Link}]
118
+
119
+ EXTRACTORS.each_key { |title| define_method(title) { @data[title] } }
120
+
121
+ alias to_s title
122
+
123
+ # @return [String]
124
+ def inspect # rubocop:disable Metrics/AbcSize
125
+ [
126
+ 'ThisIs ',
127
+ title,
128
+ languages.iff { |l| l.count == 1 }&.yield_self { |l| l.values.first.title.prepend('/') },
129
+ languages.iff { |l| l.count > 1 }&.yield_self { |l| " +#{l.count} translations" },
130
+ categories.iff(&:any?)&.yield_self { |c| ", #{c.count} categories" },
131
+ image&.yield_self { ' [img]' },
132
+ coordinates&.to_s&.surround(' {', '}')
133
+ ].compact.join.surround('#<', '>')
134
+ end
135
+
136
+ # @return [Description]
137
+ def describe(*)
138
+ maxlength = @data.keys.map(&:length).max
139
+ Description.new(
140
+ "#{self}\n" +
141
+ clean_data
142
+ .map { |k, v| " #{k.to_s.rjust(maxlength)}: #{v.inspect}" }.join("\n")
143
+ )
144
+ end
145
+
146
+ # @return [Hash]
147
+ def to_h
148
+ {type: 'ThisIs'} # To be at the beginning of a hash
149
+ .merge(@data)
150
+ .merge(
151
+ coordinates: coordinates&.to_s,
152
+ languages: languages.transform_values(&:to_s)
153
+ ).reject { |_, v| v.nil? || v.respond_to?(:empty?) && v.empty? }
154
+ end
155
+
156
+ # @return [String]
157
+ def to_json(opts)
158
+ to_h.to_json(opts)
159
+ end
160
+
161
+ # Refetch page with more data, see {WhatIs#this} for options explanation. Returns new object.
162
+ #
163
+ # @param options [Hash]
164
+ # @option options [true, String, Symbol] :languages
165
+ # @option options [true, false] :categories
166
+ # @return [ThisIs]
167
+ def what(**options)
168
+ @owner.this(title, **options)
169
+ end
170
+
171
+ private
172
+
173
+ def clean_data
174
+ @data.reject { |_, v| v.nil? || v.respond_to?(:empty?) && v.empty? }
175
+ end
176
+ end
177
+ end
178
+
179
+ require_relative 'thisis/ambigous'
180
+ require_relative 'thisis/notfound'
181
+ require_relative 'thisis/link'
@@ -0,0 +1,128 @@
1
+ class WhatIs
2
+ class ThisIs
3
+ # Represents disambiguation page.
4
+ #
5
+ # You should never create instances of this class directly, but rather obtain it from {WhatIs#this}
6
+ # and {WhatIs#these}.
7
+ #
8
+ # `Ambigous` consists of {#variants}, each of them represented by a {ThisIs::Link} which can
9
+ # be {ThisIs::Link#resolve resolved}.
10
+ #
11
+ # @note This functionality (special wrapper for disambiguation
12
+ # pages) works only for those language Wikis which have their "disambiguation" category known
13
+ # to `WhatIs`. See {WhatIs::AMBIGOUS_CATEGORIES}.
14
+ #
15
+ # @example
16
+ # a = WhatIs.this('Bela Crkva')
17
+ # # => #<ThisIs::Ambigous Bela Crkva (6 options)>
18
+ # a.describe
19
+ # # => Bela Crkva: ambigous (6 options)
20
+ # # #<ThisIs::Link Bela Crkva, Banat>: Bela Crkva, Banat, a town in Vojvodina, Serbia
21
+ # # #<ThisIs::Link Bela Crkva, Krivogaštani>: Bela Crkva, Krivogaštani, a village in the Municipality of Krivogaštani, Macedonia
22
+ # # #<ThisIs::Link Bela Crkva (Krupanj)>: Bela Crkva (Krupanj), a village in the Mačva District of Serbia
23
+ # # #<ThisIs::Link Toplička Bela Crkva>: Toplička Bela Crkva, original name of the city of Kuršumlija, Serbia
24
+ # # #<ThisIs::Link See also/Bila Tserkva>: Bila Tserkva (Біла Церква), a city in the Kiev Oblast of Ukraine
25
+ # # #<ThisIs::Link See also/Byala Cherkva>: Byala Cherkva, a town in the Veliko Turnovo oblast of Bulgaria
26
+ # #
27
+ # # Usage: .variants[0].resolve, .resolve_all
28
+ #
29
+ # a.variants[0]
30
+ # # => #<ThisIs::Link Bela Crkva, Banat>
31
+ # a.variants[0].resolve
32
+ # # => #<ThisIs Bela Crkva, Banat [img] {44.897500,21.416944}>
33
+ # a.variants[0].resolve(categories: true)
34
+ # # => #<ThisIs Bela Crkva, Banat, 5 categories [img] {44.897500,21.416944}>
35
+ # a.resolve_all
36
+ # # => {"Bela Crkva, Banat"=>#<ThisIs Bela Crkva, Banat [img] {44.897500,21.416944}>, "Bela Crkva, Krivogaštani"=>#<ThisIs Bela Crkva, Krivogaštani {41.280833,21.345278}>, "Bela Crkva (Krupanj)"=>#<ThisIs Bela Crkva (Krupanj) [img] {44.395000,19.479400}>, "Toplička Bela Crkva"=>#<ThisIs Kuršumlija [img] {43.150000,21.266667}>, "Bila Tserkva"=>#<ThisIs Bila Tserkva [img] {49.798889,30.115278}>, "Byala Cherkva"=>#<ThisIs Byala Cherkva [img] {43.200000,25.300000}>}
37
+ #
38
+ class Ambigous
39
+ # Original [Infoboxer page](http://www.rubydoc.info/gems/infoboxer/Infoboxer/MediaWiki/Page) data.
40
+ # @return [Infoboxer::MediaWiki::Page]
41
+ attr_reader :page
42
+
43
+ # Each link can be {ThisIs::Link#resolve resolved} individually, like
44
+ # `ambigous.variants[0].resolve`, or you can resolve them all at once with {#resolve_all}.
45
+ #
46
+ # @return [Array<ThisIs::Link>]
47
+ attr_reader :variants
48
+
49
+ # @private
50
+ def initialize(owner, page)
51
+ @owner = owner
52
+ @page = page
53
+ @variants = extract_variants
54
+ end
55
+
56
+ # @return [String]
57
+ def title
58
+ page.title
59
+ end
60
+
61
+ # @return [String]
62
+ def inspect
63
+ "#<ThisIs::Ambigous #{title} (#{variants.count} options)>"
64
+ end
65
+
66
+ # @return [String]
67
+ def to_s
68
+ "#{title}: ambigous (#{variants.count} options)"
69
+ end
70
+
71
+ # @return [Hash]
72
+ def to_h
73
+ {
74
+ type: 'ThisIs::Ambigous',
75
+ title: title,
76
+ variants: variants.map(&:to_s)
77
+ }
78
+ end
79
+
80
+ # @return [String]
81
+ def to_json(opts)
82
+ to_h.to_json(opts)
83
+ end
84
+
85
+ # @return [Description]
86
+ def describe(help: true)
87
+ Description.new(
88
+ "#{self}\n" +
89
+ variants.map { |link| " #{link.inspect}: #{link.description}" }.join("\n") +
90
+ describe_help(help)
91
+ )
92
+ end
93
+
94
+ # Resolves all ambigous variants with one query.
95
+ # See {WhatIs#this} for options explanation.
96
+ #
97
+ # @param options [Hash]
98
+ # @option options [true, String, Symbol] :languages
99
+ # @option options [true, false] :categories
100
+ # @return [Hash{String => ThisIs}]
101
+ def resolve_all(**options)
102
+ @owner.these(*variants.map(&:title), **options)
103
+ end
104
+
105
+ private
106
+
107
+ def describe_help(render = true)
108
+ return '' unless render
109
+ "\n\n Usage: .variants[0].resolve, .resolve_all"
110
+ end
111
+
112
+ def extract_variants
113
+ page.wikipath('//ListItem')
114
+ .reject { |item| item.wikilinks.empty? }
115
+ .map(&method(:item_to_link))
116
+ end
117
+
118
+ def item_to_link(item)
119
+ Link.new(
120
+ item.wikilinks.first.link,
121
+ owner: @owner,
122
+ section: item.in_sections.map(&:heading).map(&:text_).reverse.reject(&:empty?).join('/'),
123
+ description: item.children.map(&:text).join
124
+ )
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,75 @@
1
+ class WhatIs
2
+ using Refinements
3
+
4
+ class ThisIs
5
+ # Represents link to some entity that can be resolved to proper entity definition.
6
+ #
7
+ # You should never create instances of this class directly, it occurs as variant links from
8
+ # {Ambigous}, and as {ThisIs::languages} links.
9
+ #
10
+ # @example
11
+ # # Ambigous variants link
12
+ # a = WhatIs.this('Bela Crkva')
13
+ # # => #<ThisIs::Ambigous Bela Crkva (6 options)>
14
+ # a.variants[0]
15
+ # # => #<ThisIs::Link Bela Crkva, Banat>
16
+ # a.variants[0].resolve
17
+ # # => #<ThisIs Bela Crkva, Banat [img] {44.897500,21.416944}>
18
+ #
19
+ # # Languages link
20
+ # paris = WhatIs.this('Paris', languages: :ru)
21
+ # # => #<ThisIs Paris/Париж, [img] {48.856700,2.350800}>
22
+ # paris.languages
23
+ # # => {"ru"=>#<ThisIs::Link ru:Париж>}
24
+ # paris.languages['ru'].resolve(categories: true)
25
+ # # => #<ThisIs Париж, 10 categories [img] {48.833333,2.333333}>
26
+ #
27
+ class Link
28
+ # @return [String]
29
+ attr_reader :title
30
+ # @private
31
+ attr_reader :language
32
+
33
+ # @private
34
+ # For pretty output only
35
+ attr_reader :section, :description
36
+
37
+ # @private
38
+ def initialize(title, section: nil, owner: nil, language: nil, description: nil)
39
+ @owner = owner
40
+ @title = title
41
+ @language = language&.to_s
42
+ @section = section unless section == ''
43
+ @description = description
44
+ end
45
+
46
+ # @return [String]
47
+ def inspect
48
+ "#<ThisIs::Link #{language&.append(':')}#{section&.append('/')}#{title}>"
49
+ end
50
+
51
+ alias to_s title
52
+
53
+ # Resolves the link, fetching entity from Wikipedia API.
54
+ #
55
+ # See {WhatIs#this} for options explanation.
56
+ #
57
+ # @param options [Hash]
58
+ # @option options [true, String, Symbol] :languages
59
+ # @option options [true, false] :categories
60
+ # @return [ThisIs, ThisIs::Ambigous]
61
+ def resolve(**options)
62
+ engine = @owner || language && WhatIs[language] or
63
+ fail "Can't resolve #{inspect}"
64
+
65
+ engine.this(title, **options)
66
+ end
67
+
68
+ # @private
69
+ # For tests only
70
+ def ==(other)
71
+ other.is_a?(Link) && other.language == language && other.title == title
72
+ end
73
+ end
74
+ end
75
+ end