ubi 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/Gemfile +11 -0
- data/Guardfile +16 -0
- data/MIT-LICENSE +19 -0
- data/README.md +57 -0
- data/Rakefile +9 -0
- data/bin/ubi +10 -0
- data/lib/ubi.rb +37 -0
- data/lib/ubi/aranea.rb +42 -0
- data/lib/ubi/artifex.rb +28 -0
- data/lib/ubi/consultor.rb +61 -0
- data/lib/ubi/consultores/bing.rb +26 -0
- data/lib/ubi/consultores/duckduckgo.rb +26 -0
- data/lib/ubi/consultores/facebook.rb +6 -0
- data/lib/ubi/consultores/foursquare.rb +6 -0
- data/lib/ubi/consultores/google.rb +24 -0
- data/lib/ubi/consultores/linkedin.rb +0 -0
- data/lib/ubi/consultores/twitter.rb +6 -0
- data/lib/ubi/consultores/wikipedia.rb +6 -0
- data/lib/ubi/consultores/yahoo.rb +26 -0
- data/lib/ubi/datum.rb +43 -0
- data/lib/ubi/impero.rb +20 -0
- data/lib/ubi/memoria.rb +72 -0
- data/lib/ubi/memorias/address.rb +71 -0
- data/lib/ubi/memorias/document.rb +50 -0
- data/lib/ubi/memorias/email.rb +19 -0
- data/lib/ubi/memorias/phone.rb +33 -0
- data/lib/ubi/memorias/site.rb +29 -0
- data/lib/ubi/memorias/social.rb +20 -0
- data/lib/ubi/memorias/who.rb +20 -0
- data/lib/ubi/thema.rb +62 -0
- data/lib/ubi/version.rb +4 -0
- data/spec/fixtures/email.txt +5 -0
- data/spec/fixtures/mobile.txt +17 -0
- data/spec/fixtures/page.txt +21 -0
- data/spec/fixtures/phone.txt +17 -0
- data/spec/fixtures/site.txt +21 -0
- data/spec/spec_helper.rb +40 -0
- data/spec/ubi/aranea_spec.rb +19 -0
- data/spec/ubi/artifex_spec.rb +4 -0
- data/spec/ubi/memorias/address_spec.rb +56 -0
- data/spec/ubi/memorias/document_spec.rb +48 -0
- data/spec/ubi/memorias/email_spec.rb +59 -0
- data/spec/ubi/memorias/phone_spec.rb +79 -0
- data/spec/ubi/memorias/site_spec.rb +82 -0
- data/spec/ubi/thema_spec.rb +33 -0
- data/ubi.gemspec +39 -0
- metadata +232 -0
data/lib/ubi/datum.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
module Ubi
|
2
|
+
# Suppose to be html reader
|
3
|
+
class Datum
|
4
|
+
attr_accessor :data, :words, :links
|
5
|
+
|
6
|
+
def initialize(data, words, links)
|
7
|
+
# binding.pry
|
8
|
+
@data = data
|
9
|
+
@words = data.xpath(words).text
|
10
|
+
@links = data.xpath(links).map { |a| a.values.join(' ') }
|
11
|
+
end
|
12
|
+
|
13
|
+
def xpath(path)
|
14
|
+
data.xpath(path)
|
15
|
+
end
|
16
|
+
|
17
|
+
def read_div(div)
|
18
|
+
data.xpath("//#{div}").text
|
19
|
+
end
|
20
|
+
|
21
|
+
def read_list(list, args = [])
|
22
|
+
s = struct_for(args)
|
23
|
+
data.xpath(list).map { |i| s.new(*i.xpath) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def read_table(table, args = [], subs = '')
|
27
|
+
s = struct_for(*args)
|
28
|
+
data.xpath(table).map do |r|
|
29
|
+
s.new(*r.xpath('td/text()').map { |t| normalize(t, subs) })
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def normalize(txt, subs = '')
|
34
|
+
txt.to_s.gsub(subs, '').strip.chomp
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def struct_for(*keys)
|
40
|
+
@struct = Struct.new(*keys)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/ubi/impero.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'paint'
|
2
|
+
|
3
|
+
module Ubi
|
4
|
+
# Impero: I command!
|
5
|
+
class Impero < Thor
|
6
|
+
class_option :verbose, type: :boolean, aliases: :v
|
7
|
+
|
8
|
+
# desc 'init', 'creates settings on ~'
|
9
|
+
desc 'find', 'Lookup for something'
|
10
|
+
long_desc <<-LONG
|
11
|
+
|
12
|
+
Find something based only on name
|
13
|
+
|
14
|
+
LONG
|
15
|
+
option :address, type: :string # 'Subject\'s address'
|
16
|
+
def find(name)
|
17
|
+
Ubi::Artifex.new(name).spec
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/ubi/memoria.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# Memoria Base
|
4
|
+
class Base
|
5
|
+
include ActiveModel::Validations
|
6
|
+
attr_accessor :value
|
7
|
+
|
8
|
+
def initialize(value, aranea = nil, thema = nil)
|
9
|
+
@value = value
|
10
|
+
@aranea = aranea
|
11
|
+
@thema = thema
|
12
|
+
end
|
13
|
+
|
14
|
+
# Format for #to_s
|
15
|
+
def format
|
16
|
+
value.downcase
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_s
|
20
|
+
format
|
21
|
+
end
|
22
|
+
|
23
|
+
class << self
|
24
|
+
#
|
25
|
+
# Account for memorias
|
26
|
+
#
|
27
|
+
def inherited(base)
|
28
|
+
fail "Already defined #{base.key}" if Ubi.memorias.include?(base)
|
29
|
+
puts "With memoria #{base}"
|
30
|
+
Ubi.memorias << base
|
31
|
+
end
|
32
|
+
|
33
|
+
def extract_text(datum)
|
34
|
+
case datum
|
35
|
+
when String then datum
|
36
|
+
when Nokogiri::HTML then datum.data.text
|
37
|
+
else fail "Can't parse `#{datum.class}`"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse(datum)
|
42
|
+
fail "Not implemented by #{self}" unless regex
|
43
|
+
extract_text(datum).scan(regex).map { |r| new(r.first) }
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Human-readable name of the aranea
|
48
|
+
#
|
49
|
+
def key
|
50
|
+
@key ||= to_s.split('::').last.downcase.to_sym
|
51
|
+
# fail "Not implemented by #{self}"
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Human-readable name of the aranea
|
56
|
+
#
|
57
|
+
def name
|
58
|
+
to_s.split('::').last
|
59
|
+
end
|
60
|
+
|
61
|
+
def plural
|
62
|
+
"#{key}s"
|
63
|
+
end
|
64
|
+
|
65
|
+
def ==(other)
|
66
|
+
return unless other.respond_to?(:key)
|
67
|
+
key == other.key
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# An adress in this world
|
4
|
+
class Address < Base
|
5
|
+
DIVIDERS = /[,\-\|\/]/
|
6
|
+
SPLIT = /(?<=\D)#{DIVIDERS}|#{DIVIDERS}(?=\D)/
|
7
|
+
REGEXES = {
|
8
|
+
br: {
|
9
|
+
prefix: %w( r rua av avenida pç pça praça pc pca praca tv travessa est estrada rod rodovia ),
|
10
|
+
number: %w( n no nº num numero km ),
|
11
|
+
ext: %w( comp obs ap apto apart apartamento andar ),
|
12
|
+
zip: /\d{5}[-]\d{3}/
|
13
|
+
},
|
14
|
+
us: {
|
15
|
+
prefix: %w( st street av avenue road ),
|
16
|
+
zip: /\d{5}/
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
attr_accessor :name, :parts, :words, :zip, :place, :number,
|
21
|
+
:city, :region, :nation, :extra
|
22
|
+
#
|
23
|
+
#
|
24
|
+
# Init, remove non word chars
|
25
|
+
#
|
26
|
+
def initialize(val, _location = :br)
|
27
|
+
@value = Address.sanitize(val)
|
28
|
+
# @zip = value.match(REGEXES[location][:zip])
|
29
|
+
# @region = value.match(/\W([A-Z]{2})\W/)[1]
|
30
|
+
# @number = value.match(/\w*\d+\w*/)
|
31
|
+
|
32
|
+
@parts = value.split(SPLIT).map { |v| v.strip.chomp }
|
33
|
+
@words = parts.map { |pt| pt.split(/\s+/) }
|
34
|
+
end
|
35
|
+
|
36
|
+
def format(location = :br)
|
37
|
+
value.sub(*self.class.formats[location])
|
38
|
+
end
|
39
|
+
|
40
|
+
class << self
|
41
|
+
#
|
42
|
+
# Sanitizing
|
43
|
+
#
|
44
|
+
# ".." -> "."
|
45
|
+
# "\n" -> "-"
|
46
|
+
# " -" -> "-"
|
47
|
+
#
|
48
|
+
def sanitize(value)
|
49
|
+
value.gsub(/\s+/, ' ').gsub(/\\n/, '-')
|
50
|
+
.gsub(/\s?(#{DIVIDERS})\s?/, '\1')
|
51
|
+
end
|
52
|
+
|
53
|
+
def formats
|
54
|
+
{
|
55
|
+
# br: '%a, %n - %c %z %r',
|
56
|
+
# br: '%a, %n - %c %z %r',
|
57
|
+
br: '%a, %n - %c %z %r'
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
def regex
|
62
|
+
/((?:#{REGEXES[:br][:prefix].join('|')}).*)/i
|
63
|
+
end
|
64
|
+
|
65
|
+
def plural
|
66
|
+
:addresses
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# Show me your papers!
|
4
|
+
#
|
5
|
+
# https://en.wikipedia.org/wiki/National_identification_number
|
6
|
+
#
|
7
|
+
# br: CNPJ
|
8
|
+
# us: SSN
|
9
|
+
# cl: RUN/RUT
|
10
|
+
#
|
11
|
+
class Document < Base
|
12
|
+
#
|
13
|
+
#
|
14
|
+
# Init, remove non word chars
|
15
|
+
#
|
16
|
+
def initialize(value)
|
17
|
+
@value = value.gsub(/\W/, '')
|
18
|
+
end
|
19
|
+
|
20
|
+
def format(location = :br)
|
21
|
+
value.sub(*self.class.formats[location])
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# Class methods
|
26
|
+
#
|
27
|
+
class << self
|
28
|
+
def regexes
|
29
|
+
{
|
30
|
+
br: /(\d{14}|\d{2}\.?\d{3}\.?\d{3}\/?\d{4}[-]?\d{2})/,
|
31
|
+
cl: /\d{2}\.\d{3}\.\d{3}[-][0-9kK]/,
|
32
|
+
us: /\d{3}[-]\d{2}[-]\d{4}/
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def formats
|
37
|
+
{
|
38
|
+
br: [/(\d{2})(\d{3})(\d{3})(\d{4})(\d{2})/, '\1.\2.\3/\4-\5'],
|
39
|
+
cl: [/(\d{2})(\d{3})(\d{3})(\d{1})/, '\1.\2.\3-\4'],
|
40
|
+
us: [/(\d{3})(\d{2})(\d{4})/, '\1-\2-\3']
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def regex(location = :br)
|
45
|
+
regexes[location]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# An Electronic Mail
|
4
|
+
class Email < Base
|
5
|
+
#
|
6
|
+
#
|
7
|
+
# Class methods
|
8
|
+
#
|
9
|
+
class << self
|
10
|
+
#
|
11
|
+
# Email regex
|
12
|
+
#
|
13
|
+
def regex
|
14
|
+
%r{([a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)} # rubocop:disable Metrics/LineLength
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# A Phone! mobile? landline? who is calling???
|
4
|
+
class Phone < Base
|
5
|
+
attr_reader :number, :chunk
|
6
|
+
|
7
|
+
def initialize(chunk, hint = nil)
|
8
|
+
@hint = hint
|
9
|
+
@chunk = chunk
|
10
|
+
parse_number
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse_number
|
14
|
+
@number = Phonelib.parse(chunk.gsub(/\D/, ''), @hint)
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
number && number.national
|
19
|
+
end
|
20
|
+
|
21
|
+
def rfc
|
22
|
+
number && number.international
|
23
|
+
end
|
24
|
+
|
25
|
+
class << self
|
26
|
+
# http://rubular.com/r/tEHB6KcZzk
|
27
|
+
def regex
|
28
|
+
/(?:^|\s)((?:\+\(?\d{1,3}\W)?[\._\-\/\s]*\(?\s*?\d{2,3}\s*?\)?[\._\-\/\s]*\d{3,5}[\._\-\/\s]*\d{4,5})(?:\s|$)/
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# A site, url and title?
|
4
|
+
class Site < Base
|
5
|
+
#
|
6
|
+
# Prefix http:// if there isn't one defined
|
7
|
+
def format
|
8
|
+
value =~ /http/ ? value : "http://#{value}"
|
9
|
+
end
|
10
|
+
|
11
|
+
class << self
|
12
|
+
#
|
13
|
+
# Regex only for *.tld
|
14
|
+
def regex
|
15
|
+
# %r{https?://((?:\w+[\./]?)+)(?:/|\.)}
|
16
|
+
# (?:\??)[a-zA-Z0-9\-\._\?\,\'\/\\\+&%\$#\=~]+
|
17
|
+
# %r{(?:https?\://)?(?:www\.)?[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}}
|
18
|
+
# ((?:https\:\/\/)|(?:http\:\/\/)|(?:www\.))?([a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(?:\??)[a-zA-Z0-9\-\._\?\,\'\/\\\+&%\$#\=~]+)
|
19
|
+
# http://www.regexr.com/3bkne
|
20
|
+
/([(https?):\/\/(www\.)?a-zA-Z0-9@:%\._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?&\/\/?=]*))/
|
21
|
+
end
|
22
|
+
|
23
|
+
def key
|
24
|
+
:site
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# Social account
|
4
|
+
class Social < Base
|
5
|
+
class << self
|
6
|
+
def apps
|
7
|
+
{
|
8
|
+
twitter: 'twitter.com',
|
9
|
+
facebook: 'facebook.com',
|
10
|
+
foursquare: 'foursquare.com'
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def regex
|
15
|
+
%r{https?://(?:\w+\.)*(#{url}/.*)}
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# Whoami
|
4
|
+
class Who < Base
|
5
|
+
class << self
|
6
|
+
def apps
|
7
|
+
{
|
8
|
+
twitter: 'twitter.com',
|
9
|
+
facebook: 'facebook.com',
|
10
|
+
foursquare: 'foursquare.com'
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def regex
|
15
|
+
%r{https?://(?:\w+\.)*(#{url}/.*)}
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/ubi/thema.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module Ubi
|
2
|
+
#
|
3
|
+
# Thema -> subject, matter, case
|
4
|
+
#
|
5
|
+
class Thema
|
6
|
+
include ActiveModel::AttributeMethods
|
7
|
+
include ActiveModel::Serialization
|
8
|
+
include ActiveModel::Dirty
|
9
|
+
|
10
|
+
attr_accessor :name, :urls, :opts, :ascii, :clean
|
11
|
+
|
12
|
+
def initialize(name, urls = [], opts = {})
|
13
|
+
@name = name
|
14
|
+
@urls = urls
|
15
|
+
@opts = opts
|
16
|
+
@cache = Ubi.memorias.reduce({}) { |a, e| a.merge(e => opts[e]) }
|
17
|
+
reduce_names
|
18
|
+
end
|
19
|
+
|
20
|
+
def araneas
|
21
|
+
@araneas ||= urls.map { |u| Aranea.new(self, u) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def reduce_names
|
25
|
+
@ascii = name.mb_chars.downcase
|
26
|
+
@downcase = name.mb_chars.downcase
|
27
|
+
@clean = @downcase.gsub(/\W/, ' ')
|
28
|
+
end
|
29
|
+
|
30
|
+
Ubi.memorias.each do |memoria|
|
31
|
+
define_method memoria.plural do
|
32
|
+
instance_variable_get('@' + memoria.plural) ||
|
33
|
+
instance_variable_set('@' + memoria.plural, [])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def [](arg)
|
38
|
+
@cache[arg]
|
39
|
+
end
|
40
|
+
|
41
|
+
def spec
|
42
|
+
puts self
|
43
|
+
Ubi.memorias.each do |memoria|
|
44
|
+
print Paint[memoria.name, :black]
|
45
|
+
puts self[memoria.key]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def try_consultor(a)
|
50
|
+
a = a.new(self)
|
51
|
+
Ubi.memorias.each do |m|
|
52
|
+
puts Paint["Trying to find #{m} in #{a.class}", :green]
|
53
|
+
@cache[m] = matches = m.parse(a.datum)
|
54
|
+
puts matches if matches && !matches.empty?
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_s
|
59
|
+
name
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|