ubi 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/Gemfile +11 -0
- data/Guardfile +16 -0
- data/MIT-LICENSE +19 -0
- data/README.md +57 -0
- data/Rakefile +9 -0
- data/bin/ubi +10 -0
- data/lib/ubi.rb +37 -0
- data/lib/ubi/aranea.rb +42 -0
- data/lib/ubi/artifex.rb +28 -0
- data/lib/ubi/consultor.rb +61 -0
- data/lib/ubi/consultores/bing.rb +26 -0
- data/lib/ubi/consultores/duckduckgo.rb +26 -0
- data/lib/ubi/consultores/facebook.rb +6 -0
- data/lib/ubi/consultores/foursquare.rb +6 -0
- data/lib/ubi/consultores/google.rb +24 -0
- data/lib/ubi/consultores/linkedin.rb +0 -0
- data/lib/ubi/consultores/twitter.rb +6 -0
- data/lib/ubi/consultores/wikipedia.rb +6 -0
- data/lib/ubi/consultores/yahoo.rb +26 -0
- data/lib/ubi/datum.rb +43 -0
- data/lib/ubi/impero.rb +20 -0
- data/lib/ubi/memoria.rb +72 -0
- data/lib/ubi/memorias/address.rb +71 -0
- data/lib/ubi/memorias/document.rb +50 -0
- data/lib/ubi/memorias/email.rb +19 -0
- data/lib/ubi/memorias/phone.rb +33 -0
- data/lib/ubi/memorias/site.rb +29 -0
- data/lib/ubi/memorias/social.rb +20 -0
- data/lib/ubi/memorias/who.rb +20 -0
- data/lib/ubi/thema.rb +62 -0
- data/lib/ubi/version.rb +4 -0
- data/spec/fixtures/email.txt +5 -0
- data/spec/fixtures/mobile.txt +17 -0
- data/spec/fixtures/page.txt +21 -0
- data/spec/fixtures/phone.txt +17 -0
- data/spec/fixtures/site.txt +21 -0
- data/spec/spec_helper.rb +40 -0
- data/spec/ubi/aranea_spec.rb +19 -0
- data/spec/ubi/artifex_spec.rb +4 -0
- data/spec/ubi/memorias/address_spec.rb +56 -0
- data/spec/ubi/memorias/document_spec.rb +48 -0
- data/spec/ubi/memorias/email_spec.rb +59 -0
- data/spec/ubi/memorias/phone_spec.rb +79 -0
- data/spec/ubi/memorias/site_spec.rb +82 -0
- data/spec/ubi/thema_spec.rb +33 -0
- data/ubi.gemspec +39 -0
- metadata +232 -0
data/lib/ubi/datum.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
module Ubi
|
2
|
+
# Suppose to be html reader
|
3
|
+
class Datum
|
4
|
+
attr_accessor :data, :words, :links
|
5
|
+
|
6
|
+
def initialize(data, words, links)
|
7
|
+
# binding.pry
|
8
|
+
@data = data
|
9
|
+
@words = data.xpath(words).text
|
10
|
+
@links = data.xpath(links).map { |a| a.values.join(' ') }
|
11
|
+
end
|
12
|
+
|
13
|
+
def xpath(path)
|
14
|
+
data.xpath(path)
|
15
|
+
end
|
16
|
+
|
17
|
+
def read_div(div)
|
18
|
+
data.xpath("//#{div}").text
|
19
|
+
end
|
20
|
+
|
21
|
+
def read_list(list, args = [])
|
22
|
+
s = struct_for(args)
|
23
|
+
data.xpath(list).map { |i| s.new(*i.xpath) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def read_table(table, args = [], subs = '')
|
27
|
+
s = struct_for(*args)
|
28
|
+
data.xpath(table).map do |r|
|
29
|
+
s.new(*r.xpath('td/text()').map { |t| normalize(t, subs) })
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def normalize(txt, subs = '')
|
34
|
+
txt.to_s.gsub(subs, '').strip.chomp
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def struct_for(*keys)
|
40
|
+
@struct = Struct.new(*keys)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/ubi/impero.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'paint'
|
2
|
+
|
3
|
+
module Ubi
|
4
|
+
# Impero: I command!
|
5
|
+
class Impero < Thor
|
6
|
+
class_option :verbose, type: :boolean, aliases: :v
|
7
|
+
|
8
|
+
# desc 'init', 'creates settings on ~'
|
9
|
+
desc 'find', 'Lookup for something'
|
10
|
+
long_desc <<-LONG
|
11
|
+
|
12
|
+
Find something based only on name
|
13
|
+
|
14
|
+
LONG
|
15
|
+
option :address, type: :string # 'Subject\'s address'
|
16
|
+
def find(name)
|
17
|
+
Ubi::Artifex.new(name).spec
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/ubi/memoria.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# Memoria Base
|
4
|
+
class Base
|
5
|
+
include ActiveModel::Validations
|
6
|
+
attr_accessor :value
|
7
|
+
|
8
|
+
def initialize(value, aranea = nil, thema = nil)
|
9
|
+
@value = value
|
10
|
+
@aranea = aranea
|
11
|
+
@thema = thema
|
12
|
+
end
|
13
|
+
|
14
|
+
# Format for #to_s
|
15
|
+
def format
|
16
|
+
value.downcase
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_s
|
20
|
+
format
|
21
|
+
end
|
22
|
+
|
23
|
+
class << self
|
24
|
+
#
|
25
|
+
# Account for memorias
|
26
|
+
#
|
27
|
+
def inherited(base)
|
28
|
+
fail "Already defined #{base.key}" if Ubi.memorias.include?(base)
|
29
|
+
puts "With memoria #{base}"
|
30
|
+
Ubi.memorias << base
|
31
|
+
end
|
32
|
+
|
33
|
+
def extract_text(datum)
|
34
|
+
case datum
|
35
|
+
when String then datum
|
36
|
+
when Nokogiri::HTML then datum.data.text
|
37
|
+
else fail "Can't parse `#{datum.class}`"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse(datum)
|
42
|
+
fail "Not implemented by #{self}" unless regex
|
43
|
+
extract_text(datum).scan(regex).map { |r| new(r.first) }
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Human-readable name of the aranea
|
48
|
+
#
|
49
|
+
def key
|
50
|
+
@key ||= to_s.split('::').last.downcase.to_sym
|
51
|
+
# fail "Not implemented by #{self}"
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Human-readable name of the aranea
|
56
|
+
#
|
57
|
+
def name
|
58
|
+
to_s.split('::').last
|
59
|
+
end
|
60
|
+
|
61
|
+
def plural
|
62
|
+
"#{key}s"
|
63
|
+
end
|
64
|
+
|
65
|
+
def ==(other)
|
66
|
+
return unless other.respond_to?(:key)
|
67
|
+
key == other.key
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# An adress in this world
|
4
|
+
class Address < Base
|
5
|
+
DIVIDERS = /[,\-\|\/]/
|
6
|
+
SPLIT = /(?<=\D)#{DIVIDERS}|#{DIVIDERS}(?=\D)/
|
7
|
+
REGEXES = {
|
8
|
+
br: {
|
9
|
+
prefix: %w( r rua av avenida pç pça praça pc pca praca tv travessa est estrada rod rodovia ),
|
10
|
+
number: %w( n no nº num numero km ),
|
11
|
+
ext: %w( comp obs ap apto apart apartamento andar ),
|
12
|
+
zip: /\d{5}[-]\d{3}/
|
13
|
+
},
|
14
|
+
us: {
|
15
|
+
prefix: %w( st street av avenue road ),
|
16
|
+
zip: /\d{5}/
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
attr_accessor :name, :parts, :words, :zip, :place, :number,
|
21
|
+
:city, :region, :nation, :extra
|
22
|
+
#
|
23
|
+
#
|
24
|
+
# Init, remove non word chars
|
25
|
+
#
|
26
|
+
def initialize(val, _location = :br)
|
27
|
+
@value = Address.sanitize(val)
|
28
|
+
# @zip = value.match(REGEXES[location][:zip])
|
29
|
+
# @region = value.match(/\W([A-Z]{2})\W/)[1]
|
30
|
+
# @number = value.match(/\w*\d+\w*/)
|
31
|
+
|
32
|
+
@parts = value.split(SPLIT).map { |v| v.strip.chomp }
|
33
|
+
@words = parts.map { |pt| pt.split(/\s+/) }
|
34
|
+
end
|
35
|
+
|
36
|
+
def format(location = :br)
|
37
|
+
value.sub(*self.class.formats[location])
|
38
|
+
end
|
39
|
+
|
40
|
+
class << self
|
41
|
+
#
|
42
|
+
# Sanitizing
|
43
|
+
#
|
44
|
+
# ".." -> "."
|
45
|
+
# "\n" -> "-"
|
46
|
+
# " -" -> "-"
|
47
|
+
#
|
48
|
+
def sanitize(value)
|
49
|
+
value.gsub(/\s+/, ' ').gsub(/\\n/, '-')
|
50
|
+
.gsub(/\s?(#{DIVIDERS})\s?/, '\1')
|
51
|
+
end
|
52
|
+
|
53
|
+
def formats
|
54
|
+
{
|
55
|
+
# br: '%a, %n - %c %z %r',
|
56
|
+
# br: '%a, %n - %c %z %r',
|
57
|
+
br: '%a, %n - %c %z %r'
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
def regex
|
62
|
+
/((?:#{REGEXES[:br][:prefix].join('|')}).*)/i
|
63
|
+
end
|
64
|
+
|
65
|
+
def plural
|
66
|
+
:addresses
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# Show me your papers!
|
4
|
+
#
|
5
|
+
# https://en.wikipedia.org/wiki/National_identification_number
|
6
|
+
#
|
7
|
+
# br: CNPJ
|
8
|
+
# us: SSN
|
9
|
+
# cl: RUN/RUT
|
10
|
+
#
|
11
|
+
class Document < Base
|
12
|
+
#
|
13
|
+
#
|
14
|
+
# Init, remove non word chars
|
15
|
+
#
|
16
|
+
def initialize(value)
|
17
|
+
@value = value.gsub(/\W/, '')
|
18
|
+
end
|
19
|
+
|
20
|
+
def format(location = :br)
|
21
|
+
value.sub(*self.class.formats[location])
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# Class methods
|
26
|
+
#
|
27
|
+
class << self
|
28
|
+
def regexes
|
29
|
+
{
|
30
|
+
br: /(\d{14}|\d{2}\.?\d{3}\.?\d{3}\/?\d{4}[-]?\d{2})/,
|
31
|
+
cl: /\d{2}\.\d{3}\.\d{3}[-][0-9kK]/,
|
32
|
+
us: /\d{3}[-]\d{2}[-]\d{4}/
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def formats
|
37
|
+
{
|
38
|
+
br: [/(\d{2})(\d{3})(\d{3})(\d{4})(\d{2})/, '\1.\2.\3/\4-\5'],
|
39
|
+
cl: [/(\d{2})(\d{3})(\d{3})(\d{1})/, '\1.\2.\3-\4'],
|
40
|
+
us: [/(\d{3})(\d{2})(\d{4})/, '\1-\2-\3']
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
def regex(location = :br)
|
45
|
+
regexes[location]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# An Electronic Mail
|
4
|
+
class Email < Base
|
5
|
+
#
|
6
|
+
#
|
7
|
+
# Class methods
|
8
|
+
#
|
9
|
+
class << self
|
10
|
+
#
|
11
|
+
# Email regex
|
12
|
+
#
|
13
|
+
def regex
|
14
|
+
%r{([a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)} # rubocop:disable Metrics/LineLength
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# A Phone! mobile? landline? who is calling???
|
4
|
+
class Phone < Base
|
5
|
+
attr_reader :number, :chunk
|
6
|
+
|
7
|
+
def initialize(chunk, hint = nil)
|
8
|
+
@hint = hint
|
9
|
+
@chunk = chunk
|
10
|
+
parse_number
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse_number
|
14
|
+
@number = Phonelib.parse(chunk.gsub(/\D/, ''), @hint)
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
number && number.national
|
19
|
+
end
|
20
|
+
|
21
|
+
def rfc
|
22
|
+
number && number.international
|
23
|
+
end
|
24
|
+
|
25
|
+
class << self
|
26
|
+
# http://rubular.com/r/tEHB6KcZzk
|
27
|
+
def regex
|
28
|
+
/(?:^|\s)((?:\+\(?\d{1,3}\W)?[\._\-\/\s]*\(?\s*?\d{2,3}\s*?\)?[\._\-\/\s]*\d{3,5}[\._\-\/\s]*\d{4,5})(?:\s|$)/
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# A site, url and title?
|
4
|
+
class Site < Base
|
5
|
+
#
|
6
|
+
# Prefix http:// if there isn't one defined
|
7
|
+
def format
|
8
|
+
value =~ /http/ ? value : "http://#{value}"
|
9
|
+
end
|
10
|
+
|
11
|
+
class << self
|
12
|
+
#
|
13
|
+
# Regex only for *.tld
|
14
|
+
def regex
|
15
|
+
# %r{https?://((?:\w+[\./]?)+)(?:/|\.)}
|
16
|
+
# (?:\??)[a-zA-Z0-9\-\._\?\,\'\/\\\+&%\$#\=~]+
|
17
|
+
# %r{(?:https?\://)?(?:www\.)?[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}}
|
18
|
+
# ((?:https\:\/\/)|(?:http\:\/\/)|(?:www\.))?([a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(?:\??)[a-zA-Z0-9\-\._\?\,\'\/\\\+&%\$#\=~]+)
|
19
|
+
# http://www.regexr.com/3bkne
|
20
|
+
/([(https?):\/\/(www\.)?a-zA-Z0-9@:%\._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?&\/\/?=]*))/
|
21
|
+
end
|
22
|
+
|
23
|
+
def key
|
24
|
+
:site
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# Social account
|
4
|
+
class Social < Base
|
5
|
+
class << self
|
6
|
+
def apps
|
7
|
+
{
|
8
|
+
twitter: 'twitter.com',
|
9
|
+
facebook: 'facebook.com',
|
10
|
+
foursquare: 'foursquare.com'
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def regex
|
15
|
+
%r{https?://(?:\w+\.)*(#{url}/.*)}
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Ubi
|
2
|
+
module Memoria
|
3
|
+
# Whoami
|
4
|
+
class Who < Base
|
5
|
+
class << self
|
6
|
+
def apps
|
7
|
+
{
|
8
|
+
twitter: 'twitter.com',
|
9
|
+
facebook: 'facebook.com',
|
10
|
+
foursquare: 'foursquare.com'
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def regex
|
15
|
+
%r{https?://(?:\w+\.)*(#{url}/.*)}
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/ubi/thema.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module Ubi
|
2
|
+
#
|
3
|
+
# Thema -> subject, matter, case
|
4
|
+
#
|
5
|
+
class Thema
|
6
|
+
include ActiveModel::AttributeMethods
|
7
|
+
include ActiveModel::Serialization
|
8
|
+
include ActiveModel::Dirty
|
9
|
+
|
10
|
+
attr_accessor :name, :urls, :opts, :ascii, :clean
|
11
|
+
|
12
|
+
def initialize(name, urls = [], opts = {})
|
13
|
+
@name = name
|
14
|
+
@urls = urls
|
15
|
+
@opts = opts
|
16
|
+
@cache = Ubi.memorias.reduce({}) { |a, e| a.merge(e => opts[e]) }
|
17
|
+
reduce_names
|
18
|
+
end
|
19
|
+
|
20
|
+
def araneas
|
21
|
+
@araneas ||= urls.map { |u| Aranea.new(self, u) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def reduce_names
|
25
|
+
@ascii = name.mb_chars.downcase
|
26
|
+
@downcase = name.mb_chars.downcase
|
27
|
+
@clean = @downcase.gsub(/\W/, ' ')
|
28
|
+
end
|
29
|
+
|
30
|
+
Ubi.memorias.each do |memoria|
|
31
|
+
define_method memoria.plural do
|
32
|
+
instance_variable_get('@' + memoria.plural) ||
|
33
|
+
instance_variable_set('@' + memoria.plural, [])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def [](arg)
|
38
|
+
@cache[arg]
|
39
|
+
end
|
40
|
+
|
41
|
+
def spec
|
42
|
+
puts self
|
43
|
+
Ubi.memorias.each do |memoria|
|
44
|
+
print Paint[memoria.name, :black]
|
45
|
+
puts self[memoria.key]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def try_consultor(a)
|
50
|
+
a = a.new(self)
|
51
|
+
Ubi.memorias.each do |m|
|
52
|
+
puts Paint["Trying to find #{m} in #{a.class}", :green]
|
53
|
+
@cache[m] = matches = m.parse(a.datum)
|
54
|
+
puts matches if matches && !matches.empty?
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_s
|
59
|
+
name
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|