whatis 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ class WhatIs
2
+ using Refinements
3
+
4
+ # @private
5
+ class Formatter
6
+ def call(title, object)
7
+ str =
8
+ case object
9
+ when ThisIs
10
+ format_thisis(object)
11
+ when ThisIs::Ambigous
12
+ format_ambigous(object)
13
+ when ThisIs::NotFound
14
+ format_notfound(object)
15
+ end
16
+ "#{title}: #{str}"
17
+ end
18
+
19
+ private
20
+
21
+ def format_thisis(object)
22
+ [
23
+ object.title,
24
+ object.coordinates&.to_s&.surround(' {', '}'),
25
+ ' - ',
26
+ short_description(object)
27
+ ].join
28
+ end
29
+
30
+ def short_description(obj) # rubocop:disable Metrics/AbcSize
31
+ case
32
+ when obj.categories.any?
33
+ obj.categories.sort.join('; ')
34
+ when obj.languages.count == 1
35
+ obj.languages.values.first
36
+ when !obj.description.to_s.empty?
37
+ obj.description
38
+ else
39
+ obj.extract
40
+ end
41
+ end
42
+
43
+ def format_ambigous(object)
44
+ "#{object.title}, #{object.variants.count} options - #{object.variants.join('; ')}"
45
+ end
46
+
47
+ def format_notfound(*)
48
+ 'not found'
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,34 @@
1
+ class WhatIs
2
+ # @private
3
+ module Refinements
4
+ refine String do
5
+ def append(after)
6
+ "#{self}#{after}"
7
+ end
8
+
9
+ def prepend(before)
10
+ "#{before}#{self}"
11
+ end
12
+
13
+ def surround(before, after = before)
14
+ "#{before}#{self}#{after}"
15
+ end
16
+ end
17
+
18
+ refine Object do
19
+ def yield_self
20
+ yield self
21
+ end
22
+
23
+ def iff
24
+ yield(self) ? self : nil
25
+ end
26
+ end
27
+
28
+ refine Hash do
29
+ def transform_keys
30
+ map { |key, val| [yield(key), val] }.to_h
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,181 @@
1
+ class WhatIs
2
+ using Refinements
3
+
4
+ # Represents one resolved entity, provides introspection and access to individual properties.
5
+ # You should never create instances of this class directly, but rather obtain it from {WhatIs#this}
6
+ # and {WhatIs#these}.
7
+ #
8
+ # @example
9
+ # paris = WhatIs.this('Paris')
10
+ # # => #<ThisIs Paris [img] {48.856700,2.350800}>
11
+ # paris.describe
12
+ # # => Paris
13
+ # # title: "Paris"
14
+ # # description: "capital city of France"
15
+ # # coordinates: #<Geo::Coord 48.856700,2.350800>
16
+ # # extract: "Paris (French pronunciation: ​[paʁi] ( listen)) is the capital and most populous city of France, with an administrative-limits area of 105 square kilometres (41 square miles) and a 2015 population of 2,229,621."
17
+ # # image: "https://upload.wikimedia.org/wikipedia/commons/0/08/Seine_and_Eiffel_Tower_from_Tour_Saint_Jacques_2013-08.JPG"
18
+ # #
19
+ # paris.coordinates
20
+ # # => #<Geo::Coord 48.856700,2.350800>
21
+ # paris2 = paris.what(languages: :ru, categories: true) # fetch more details
22
+ # # => #<ThisIs Paris/Париж, 12 categories [img] {48.856700,2.350800}>
23
+ # paris2.describe
24
+ # # => Paris
25
+ # # title: "Paris"
26
+ # # description: "capital city of France"
27
+ # # coordinates: #<Geo::Coord 48.856700,2.350800>
28
+ # # categories: ["3rd-century BC establishments", "Capitals in Europe", "Catholic pilgrimage sites", "Cities in France", "Cities in Île-de-France", "Companions of the Liberation", "Departments of Île-de-France", "European culture", "French culture", "Paris", "Populated places established in the 3rd century BC", "Prefectures in France"]
29
+ # # languages: {"ru"=>#<ThisIs::Link ru:Париж>}
30
+ # # extract: "Paris (French pronunciation: ​[paʁi] ( listen)) is the capital and most populous city of France, with an administrative-limits area of 105 square kilometres (41 square miles) and a 2015 population of 2,229,621."
31
+ # # image: "https://upload.wikimedia.org/wikipedia/commons/0/08/Seine_and_Eiffel_Tower_from_Tour_Saint_Jacques_2013-08.JPG"
32
+ # paris2.languages['ru'].resolve(categories: true)
33
+ # # => #<ThisIs Париж, 10 categories [img] {48.833333,2.333333}>
34
+ #
35
+ # See also:
36
+ #
37
+ # * {ThisIs::Ambigous} Representing disambiguation page, allows fetching variants.
38
+ # * {ThisIs::NotFound} Representing not found entity, allows searching for possible options.
39
+ class ThisIs
40
+ # @private
41
+ EXTRACTORS = {
42
+ title: ->(page) { page.title },
43
+ description: ->(page) { page.source.dig('terms', 'description', 0) },
44
+ coordinates: ->(page) {
45
+ coord = page.source['coordinates']&.first or return nil
46
+ Geo::Coord.from_h(coord)
47
+ },
48
+ categories: ->(page) {
49
+ Array(page.source['categories'])
50
+ .reject { |c| c.key?('hidden') }
51
+ .map { |c| c['title'].split(':', 2).last }
52
+ },
53
+ languages: ->(page) {
54
+ Array(page.source['langlinks'])
55
+ .map { |l| [l['lang'], l['*']] }
56
+ .map { |code, title| [code, Link.new(title, language: code)] }.to_h
57
+ .to_h
58
+ },
59
+ extract: ->(page) {
60
+ # remove HTML tags
61
+ # NB: Wikipedia "extracts" submodule has "plaintext=true" option, but it produces wrong 1-sentece
62
+ # extracts (broken by first ".", which can be somewhere in transcription of the main entity).
63
+ # HTML extracts, on the other hand, return proper sentences
64
+ #
65
+ # Link: https://en.wikipedia.org/w/api.php?action=help&modules=query%2Bextracts
66
+ page.source['extract']&.gsub(/<[^>]+>/, '')&.strip
67
+ },
68
+ image: ->(page) { page.source.dig('original', 'source') }
69
+ }.freeze
70
+
71
+ # @private
72
+ def self.create(owner, title, page)
73
+ case
74
+ when page.nil?
75
+ NotFound.new(owner, title)
76
+ when Array(page.source['categories']).any? { |c| owner.ambigous_categories.include?(c['title']) }
77
+ Ambigous.new(owner, page)
78
+ else
79
+ new(owner, page)
80
+ end
81
+ end
82
+
83
+ # Original [Infoboxer page](http://www.rubydoc.info/gems/infoboxer/Infoboxer/MediaWiki/Page) data.
84
+ # @return [Infoboxer::MediaWiki::Page]
85
+ attr_reader :page
86
+
87
+ # @private
88
+ def initialize(owner, page)
89
+ @owner = owner
90
+ @page = page
91
+ @data = EXTRACTORS.map { |sym, proc| [sym, proc.call(page)] }.to_h
92
+ end
93
+
94
+ # @!method title
95
+ # Title of Wikipedia page
96
+ # @return [String]
97
+ # @!method description
98
+ # Short entity description phrase from Wikidata. Not always present.
99
+ # @return [String]
100
+ # @!method extract
101
+ # First sentence of Wikipedia page
102
+ # @return [String]
103
+ # @!method coordinates
104
+ # Geographical coordinates, associated with the page, if known, wrapped in
105
+ # [Geo::Coord](https://github.com/zverok/geo_coord) type.
106
+ # @return [Geo::Coord]
107
+ # @!method image
108
+ # URL of page's main image, if known.
109
+ # @return [Geo::Coord]
110
+ # @!method categories
111
+ # List of page's categories, present only if page was fetched with `categories: true` option.
112
+ # @return [Array<String>]
113
+ # @!method languages
114
+ # Hash of other language version of page. Present only if the page wath fetched with `:languages`
115
+ # option. Keys are language codes, values are {ThisIs::Link} objects, allowing to fetch corresponding
116
+ # entities with {ThisIs::Link#resolve}.
117
+ # @return [Hash{String => ThisIs::Link}]
118
+
119
+ EXTRACTORS.each_key { |title| define_method(title) { @data[title] } }
120
+
121
+ alias to_s title
122
+
123
+ # @return [String]
124
+ def inspect # rubocop:disable Metrics/AbcSize
125
+ [
126
+ 'ThisIs ',
127
+ title,
128
+ languages.iff { |l| l.count == 1 }&.yield_self { |l| l.values.first.title.prepend('/') },
129
+ languages.iff { |l| l.count > 1 }&.yield_self { |l| " +#{l.count} translations" },
130
+ categories.iff(&:any?)&.yield_self { |c| ", #{c.count} categories" },
131
+ image&.yield_self { ' [img]' },
132
+ coordinates&.to_s&.surround(' {', '}')
133
+ ].compact.join.surround('#<', '>')
134
+ end
135
+
136
+ # @return [Description]
137
+ def describe(*)
138
+ maxlength = @data.keys.map(&:length).max
139
+ Description.new(
140
+ "#{self}\n" +
141
+ clean_data
142
+ .map { |k, v| " #{k.to_s.rjust(maxlength)}: #{v.inspect}" }.join("\n")
143
+ )
144
+ end
145
+
146
+ # @return [Hash]
147
+ def to_h
148
+ {type: 'ThisIs'} # To be at the beginning of a hash
149
+ .merge(@data)
150
+ .merge(
151
+ coordinates: coordinates&.to_s,
152
+ languages: languages.transform_values(&:to_s)
153
+ ).reject { |_, v| v.nil? || v.respond_to?(:empty?) && v.empty? }
154
+ end
155
+
156
+ # @return [String]
157
+ def to_json(opts)
158
+ to_h.to_json(opts)
159
+ end
160
+
161
+ # Refetch page with more data, see {WhatIs#this} for options explanation. Returns new object.
162
+ #
163
+ # @param options [Hash]
164
+ # @option options [true, String, Symbol] :languages
165
+ # @option options [true, false] :categories
166
+ # @return [ThisIs]
167
+ def what(**options)
168
+ @owner.this(title, **options)
169
+ end
170
+
171
+ private
172
+
173
+ def clean_data
174
+ @data.reject { |_, v| v.nil? || v.respond_to?(:empty?) && v.empty? }
175
+ end
176
+ end
177
+ end
178
+
179
+ require_relative 'thisis/ambigous'
180
+ require_relative 'thisis/notfound'
181
+ require_relative 'thisis/link'
@@ -0,0 +1,128 @@
1
+ class WhatIs
2
+ class ThisIs
3
+ # Represents disambiguation page.
4
+ #
5
+ # You should never create instances of this class directly, but rather obtain it from {WhatIs#this}
6
+ # and {WhatIs#these}.
7
+ #
8
+ # `Ambigous` consists of {#variants}, each of them represented by a {ThisIs::Link} which can
9
+ # be {ThisIs::Link#resolve resolved}.
10
+ #
11
+ # @note This functionality (special wrapper for disambiguation
12
+ # pages) works only for those language Wikis which have their "disambiguation" category known
13
+ # to `WhatIs`. See {WhatIs::AMBIGOUS_CATEGORIES}.
14
+ #
15
+ # @example
16
+ # a = WhatIs.this('Bela Crkva')
17
+ # # => #<ThisIs::Ambigous Bela Crkva (6 options)>
18
+ # a.describe
19
+ # # => Bela Crkva: ambigous (6 options)
20
+ # # #<ThisIs::Link Bela Crkva, Banat>: Bela Crkva, Banat, a town in Vojvodina, Serbia
21
+ # # #<ThisIs::Link Bela Crkva, Krivogaštani>: Bela Crkva, Krivogaštani, a village in the Municipality of Krivogaštani, Macedonia
22
+ # # #<ThisIs::Link Bela Crkva (Krupanj)>: Bela Crkva (Krupanj), a village in the Mačva District of Serbia
23
+ # # #<ThisIs::Link Toplička Bela Crkva>: Toplička Bela Crkva, original name of the city of Kuršumlija, Serbia
24
+ # # #<ThisIs::Link See also/Bila Tserkva>: Bila Tserkva (Біла Церква), a city in the Kiev Oblast of Ukraine
25
+ # # #<ThisIs::Link See also/Byala Cherkva>: Byala Cherkva, a town in the Veliko Turnovo oblast of Bulgaria
26
+ # #
27
+ # # Usage: .variants[0].resolve, .resolve_all
28
+ #
29
+ # a.variants[0]
30
+ # # => #<ThisIs::Link Bela Crkva, Banat>
31
+ # a.variants[0].resolve
32
+ # # => #<ThisIs Bela Crkva, Banat [img] {44.897500,21.416944}>
33
+ # a.variants[0].resolve(categories: true)
34
+ # # => #<ThisIs Bela Crkva, Banat, 5 categories [img] {44.897500,21.416944}>
35
+ # a.resolve_all
36
+ # # => {"Bela Crkva, Banat"=>#<ThisIs Bela Crkva, Banat [img] {44.897500,21.416944}>, "Bela Crkva, Krivogaštani"=>#<ThisIs Bela Crkva, Krivogaštani {41.280833,21.345278}>, "Bela Crkva (Krupanj)"=>#<ThisIs Bela Crkva (Krupanj) [img] {44.395000,19.479400}>, "Toplička Bela Crkva"=>#<ThisIs Kuršumlija [img] {43.150000,21.266667}>, "Bila Tserkva"=>#<ThisIs Bila Tserkva [img] {49.798889,30.115278}>, "Byala Cherkva"=>#<ThisIs Byala Cherkva [img] {43.200000,25.300000}>}
37
+ #
38
+ class Ambigous
39
+ # Original [Infoboxer page](http://www.rubydoc.info/gems/infoboxer/Infoboxer/MediaWiki/Page) data.
40
+ # @return [Infoboxer::MediaWiki::Page]
41
+ attr_reader :page
42
+
43
+ # Each link can be {ThisIs::Link#resolve resolved} individually, like
44
+ # `ambigous.variants[0].resolve`, or you can resolve them all at once with {#resolve_all}.
45
+ #
46
+ # @return [Array<ThisIs::Link>]
47
+ attr_reader :variants
48
+
49
+ # @private
50
+ def initialize(owner, page)
51
+ @owner = owner
52
+ @page = page
53
+ @variants = extract_variants
54
+ end
55
+
56
+ # @return [String]
57
+ def title
58
+ page.title
59
+ end
60
+
61
+ # @return [String]
62
+ def inspect
63
+ "#<ThisIs::Ambigous #{title} (#{variants.count} options)>"
64
+ end
65
+
66
+ # @return [String]
67
+ def to_s
68
+ "#{title}: ambigous (#{variants.count} options)"
69
+ end
70
+
71
+ # @return [Hash]
72
+ def to_h
73
+ {
74
+ type: 'ThisIs::Ambigous',
75
+ title: title,
76
+ variants: variants.map(&:to_s)
77
+ }
78
+ end
79
+
80
+ # @return [String]
81
+ def to_json(opts)
82
+ to_h.to_json(opts)
83
+ end
84
+
85
+ # @return [Description]
86
+ def describe(help: true)
87
+ Description.new(
88
+ "#{self}\n" +
89
+ variants.map { |link| " #{link.inspect}: #{link.description}" }.join("\n") +
90
+ describe_help(help)
91
+ )
92
+ end
93
+
94
+ # Resolves all ambigous variants with one query.
95
+ # See {WhatIs#this} for options explanation.
96
+ #
97
+ # @param options [Hash]
98
+ # @option options [true, String, Symbol] :languages
99
+ # @option options [true, false] :categories
100
+ # @return [Hash{String => ThisIs}]
101
+ def resolve_all(**options)
102
+ @owner.these(*variants.map(&:title), **options)
103
+ end
104
+
105
+ private
106
+
107
+ def describe_help(render = true)
108
+ return '' unless render
109
+ "\n\n Usage: .variants[0].resolve, .resolve_all"
110
+ end
111
+
112
+ def extract_variants
113
+ page.wikipath('//ListItem')
114
+ .reject { |item| item.wikilinks.empty? }
115
+ .map(&method(:item_to_link))
116
+ end
117
+
118
+ def item_to_link(item)
119
+ Link.new(
120
+ item.wikilinks.first.link,
121
+ owner: @owner,
122
+ section: item.in_sections.map(&:heading).map(&:text_).reverse.reject(&:empty?).join('/'),
123
+ description: item.children.map(&:text).join
124
+ )
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,75 @@
1
+ class WhatIs
2
+ using Refinements
3
+
4
+ class ThisIs
5
+ # Represents link to some entity that can be resolved to proper entity definition.
6
+ #
7
+ # You should never create instances of this class directly, it occurs as variant links from
8
+ # {Ambigous}, and as {ThisIs::languages} links.
9
+ #
10
+ # @example
11
+ # # Ambigous variants link
12
+ # a = WhatIs.this('Bela Crkva')
13
+ # # => #<ThisIs::Ambigous Bela Crkva (6 options)>
14
+ # a.variants[0]
15
+ # # => #<ThisIs::Link Bela Crkva, Banat>
16
+ # a.variants[0].resolve
17
+ # # => #<ThisIs Bela Crkva, Banat [img] {44.897500,21.416944}>
18
+ #
19
+ # # Languages link
20
+ # paris = WhatIs.this('Paris', languages: :ru)
21
+ # # => #<ThisIs Paris/Париж, [img] {48.856700,2.350800}>
22
+ # paris.languages
23
+ # # => {"ru"=>#<ThisIs::Link ru:Париж>}
24
+ # paris.languages['ru'].resolve(categories: true)
25
+ # # => #<ThisIs Париж, 10 categories [img] {48.833333,2.333333}>
26
+ #
27
+ class Link
28
+ # @return [String]
29
+ attr_reader :title
30
+ # @private
31
+ attr_reader :language
32
+
33
+ # @private
34
+ # For pretty output only
35
+ attr_reader :section, :description
36
+
37
+ # @private
38
+ def initialize(title, section: nil, owner: nil, language: nil, description: nil)
39
+ @owner = owner
40
+ @title = title
41
+ @language = language&.to_s
42
+ @section = section unless section == ''
43
+ @description = description
44
+ end
45
+
46
+ # @return [String]
47
+ def inspect
48
+ "#<ThisIs::Link #{language&.append(':')}#{section&.append('/')}#{title}>"
49
+ end
50
+
51
+ alias to_s title
52
+
53
+ # Resolves the link, fetching entity from Wikipedia API.
54
+ #
55
+ # See {WhatIs#this} for options explanation.
56
+ #
57
+ # @param options [Hash]
58
+ # @option options [true, String, Symbol] :languages
59
+ # @option options [true, false] :categories
60
+ # @return [ThisIs, ThisIs::Ambigous]
61
+ def resolve(**options)
62
+ engine = @owner || language && WhatIs[language] or
63
+ fail "Can't resolve #{inspect}"
64
+
65
+ engine.this(title, **options)
66
+ end
67
+
68
+ # @private
69
+ # For tests only
70
+ def ==(other)
71
+ other.is_a?(Link) && other.language == language && other.title == title
72
+ end
73
+ end
74
+ end
75
+ end