ubi 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +11 -0
  5. data/Guardfile +16 -0
  6. data/MIT-LICENSE +19 -0
  7. data/README.md +57 -0
  8. data/Rakefile +9 -0
  9. data/bin/ubi +10 -0
  10. data/lib/ubi.rb +37 -0
  11. data/lib/ubi/aranea.rb +42 -0
  12. data/lib/ubi/artifex.rb +28 -0
  13. data/lib/ubi/consultor.rb +61 -0
  14. data/lib/ubi/consultores/bing.rb +26 -0
  15. data/lib/ubi/consultores/duckduckgo.rb +26 -0
  16. data/lib/ubi/consultores/facebook.rb +6 -0
  17. data/lib/ubi/consultores/foursquare.rb +6 -0
  18. data/lib/ubi/consultores/google.rb +24 -0
  19. data/lib/ubi/consultores/linkedin.rb +0 -0
  20. data/lib/ubi/consultores/twitter.rb +6 -0
  21. data/lib/ubi/consultores/wikipedia.rb +6 -0
  22. data/lib/ubi/consultores/yahoo.rb +26 -0
  23. data/lib/ubi/datum.rb +43 -0
  24. data/lib/ubi/impero.rb +20 -0
  25. data/lib/ubi/memoria.rb +72 -0
  26. data/lib/ubi/memorias/address.rb +71 -0
  27. data/lib/ubi/memorias/document.rb +50 -0
  28. data/lib/ubi/memorias/email.rb +19 -0
  29. data/lib/ubi/memorias/phone.rb +33 -0
  30. data/lib/ubi/memorias/site.rb +29 -0
  31. data/lib/ubi/memorias/social.rb +20 -0
  32. data/lib/ubi/memorias/who.rb +20 -0
  33. data/lib/ubi/thema.rb +62 -0
  34. data/lib/ubi/version.rb +4 -0
  35. data/spec/fixtures/email.txt +5 -0
  36. data/spec/fixtures/mobile.txt +17 -0
  37. data/spec/fixtures/page.txt +21 -0
  38. data/spec/fixtures/phone.txt +17 -0
  39. data/spec/fixtures/site.txt +21 -0
  40. data/spec/spec_helper.rb +40 -0
  41. data/spec/ubi/aranea_spec.rb +19 -0
  42. data/spec/ubi/artifex_spec.rb +4 -0
  43. data/spec/ubi/memorias/address_spec.rb +56 -0
  44. data/spec/ubi/memorias/document_spec.rb +48 -0
  45. data/spec/ubi/memorias/email_spec.rb +59 -0
  46. data/spec/ubi/memorias/phone_spec.rb +79 -0
  47. data/spec/ubi/memorias/site_spec.rb +82 -0
  48. data/spec/ubi/thema_spec.rb +33 -0
  49. data/ubi.gemspec +39 -0
  50. metadata +232 -0
@@ -0,0 +1,43 @@
1
+ module Ubi
2
+ # Suppose to be html reader
3
+ class Datum
4
+ attr_accessor :data, :words, :links
5
+
6
+ def initialize(data, words, links)
7
+ # binding.pry
8
+ @data = data
9
+ @words = data.xpath(words).text
10
+ @links = data.xpath(links).map { |a| a.values.join(' ') }
11
+ end
12
+
13
+ def xpath(path)
14
+ data.xpath(path)
15
+ end
16
+
17
+ def read_div(div)
18
+ data.xpath("//#{div}").text
19
+ end
20
+
21
+ def read_list(list, args = [])
22
+ s = struct_for(args)
23
+ data.xpath(list).map { |i| s.new(*i.xpath) }
24
+ end
25
+
26
+ def read_table(table, args = [], subs = '')
27
+ s = struct_for(*args)
28
+ data.xpath(table).map do |r|
29
+ s.new(*r.xpath('td/text()').map { |t| normalize(t, subs) })
30
+ end
31
+ end
32
+
33
+ def normalize(txt, subs = '')
34
+ txt.to_s.gsub(subs, '').strip.chomp
35
+ end
36
+
37
+ private
38
+
39
+ def struct_for(*keys)
40
+ @struct = Struct.new(*keys)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,20 @@
1
+ require 'paint'
2
+
3
+ module Ubi
4
+ # Impero: I command!
5
+ class Impero < Thor
6
+ class_option :verbose, type: :boolean, aliases: :v
7
+
8
+ # desc 'init', 'creates settings on ~'
9
+ desc 'find', 'Lookup for something'
10
+ long_desc <<-LONG
11
+
12
+ Find something based only on name
13
+
14
+ LONG
15
+ option :address, type: :string # 'Subject\'s address'
16
+ def find(name)
17
+ Ubi::Artifex.new(name).spec
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,72 @@
1
+ module Ubi
2
+ module Memoria
3
+ # Memoria Base
4
+ class Base
5
+ include ActiveModel::Validations
6
+ attr_accessor :value
7
+
8
+ def initialize(value, aranea = nil, thema = nil)
9
+ @value = value
10
+ @aranea = aranea
11
+ @thema = thema
12
+ end
13
+
14
+ # Format for #to_s
15
+ def format
16
+ value.downcase
17
+ end
18
+
19
+ def to_s
20
+ format
21
+ end
22
+
23
+ class << self
24
+ #
25
+ # Account for memorias
26
+ #
27
+ def inherited(base)
28
+ fail "Already defined #{base.key}" if Ubi.memorias.include?(base)
29
+ puts "With memoria #{base}"
30
+ Ubi.memorias << base
31
+ end
32
+
33
+ def extract_text(datum)
34
+ case datum
35
+ when String then datum
36
+ when Nokogiri::HTML then datum.data.text
37
+ else fail "Can't parse `#{datum.class}`"
38
+ end
39
+ end
40
+
41
+ def parse(datum)
42
+ fail "Not implemented by #{self}" unless regex
43
+ extract_text(datum).scan(regex).map { |r| new(r.first) }
44
+ end
45
+
46
+ #
47
+ # Human-readable name of the aranea
48
+ #
49
+ def key
50
+ @key ||= to_s.split('::').last.downcase.to_sym
51
+ # fail "Not implemented by #{self}"
52
+ end
53
+
54
+ #
55
+ # Human-readable name of the aranea
56
+ #
57
+ def name
58
+ to_s.split('::').last
59
+ end
60
+
61
+ def plural
62
+ "#{key}s"
63
+ end
64
+
65
+ def ==(other)
66
+ return unless other.respond_to?(:key)
67
+ key == other.key
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,71 @@
1
+ module Ubi
2
+ module Memoria
3
+ # An adress in this world
4
+ class Address < Base
5
+ DIVIDERS = /[,\-\|\/]/
6
+ SPLIT = /(?<=\D)#{DIVIDERS}|#{DIVIDERS}(?=\D)/
7
+ REGEXES = {
8
+ br: {
9
+ prefix: %w( r rua av avenida pç pça praça pc pca praca tv travessa est estrada rod rodovia ),
10
+ number: %w( n no nº num numero km ),
11
+ ext: %w( comp obs ap apto apart apartamento andar ),
12
+ zip: /\d{5}[-]\d{3}/
13
+ },
14
+ us: {
15
+ prefix: %w( st street av avenue road ),
16
+ zip: /\d{5}/
17
+ }
18
+ }
19
+
20
+ attr_accessor :name, :parts, :words, :zip, :place, :number,
21
+ :city, :region, :nation, :extra
22
+ #
23
+ #
24
+ # Init, remove non word chars
25
+ #
26
+ def initialize(val, _location = :br)
27
+ @value = Address.sanitize(val)
28
+ # @zip = value.match(REGEXES[location][:zip])
29
+ # @region = value.match(/\W([A-Z]{2})\W/)[1]
30
+ # @number = value.match(/\w*\d+\w*/)
31
+
32
+ @parts = value.split(SPLIT).map { |v| v.strip.chomp }
33
+ @words = parts.map { |pt| pt.split(/\s+/) }
34
+ end
35
+
36
+ def format(location = :br)
37
+ value.sub(*self.class.formats[location])
38
+ end
39
+
40
+ class << self
41
+ #
42
+ # Sanitizing
43
+ #
44
+ # ".." -> "."
45
+ # "\n" -> "-"
46
+ # " -" -> "-"
47
+ #
48
+ def sanitize(value)
49
+ value.gsub(/\s+/, ' ').gsub(/\\n/, '-')
50
+ .gsub(/\s?(#{DIVIDERS})\s?/, '\1')
51
+ end
52
+
53
+ def formats
54
+ {
55
+ # br: '%a, %n - %c %z %r',
56
+ # br: '%a, %n - %c %z %r',
57
+ br: '%a, %n - %c %z %r'
58
+ }
59
+ end
60
+
61
+ def regex
62
+ /((?:#{REGEXES[:br][:prefix].join('|')}).*)/i
63
+ end
64
+
65
+ def plural
66
+ :addresses
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,50 @@
1
+ module Ubi
2
+ module Memoria
3
+ # Show me your papers!
4
+ #
5
+ # https://en.wikipedia.org/wiki/National_identification_number
6
+ #
7
+ # br: CNPJ
8
+ # us: SSN
9
+ # cl: RUN/RUT
10
+ #
11
+ class Document < Base
12
+ #
13
+ #
14
+ # Init, remove non word chars
15
+ #
16
+ def initialize(value)
17
+ @value = value.gsub(/\W/, '')
18
+ end
19
+
20
+ def format(location = :br)
21
+ value.sub(*self.class.formats[location])
22
+ end
23
+
24
+ #
25
+ # Class methods
26
+ #
27
+ class << self
28
+ def regexes
29
+ {
30
+ br: /(\d{14}|\d{2}\.?\d{3}\.?\d{3}\/?\d{4}[-]?\d{2})/,
31
+ cl: /\d{2}\.\d{3}\.\d{3}[-][0-9kK]/,
32
+ us: /\d{3}[-]\d{2}[-]\d{4}/
33
+ }
34
+ end
35
+
36
+ def formats
37
+ {
38
+ br: [/(\d{2})(\d{3})(\d{3})(\d{4})(\d{2})/, '\1.\2.\3/\4-\5'],
39
+ cl: [/(\d{2})(\d{3})(\d{3})(\d{1})/, '\1.\2.\3-\4'],
40
+ us: [/(\d{3})(\d{2})(\d{4})/, '\1-\2-\3']
41
+ }
42
+ end
43
+
44
+ def regex(location = :br)
45
+ regexes[location]
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,19 @@
1
+ module Ubi
2
+ module Memoria
3
+ # An Electronic Mail
4
+ class Email < Base
5
+ #
6
+ #
7
+ # Class methods
8
+ #
9
+ class << self
10
+ #
11
+ # Email regex
12
+ #
13
+ def regex
14
+ %r{([a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)} # rubocop:disable Metrics/LineLength
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,33 @@
1
+ module Ubi
2
+ module Memoria
3
+ # A Phone! mobile? landline? who is calling???
4
+ class Phone < Base
5
+ attr_reader :number, :chunk
6
+
7
+ def initialize(chunk, hint = nil)
8
+ @hint = hint
9
+ @chunk = chunk
10
+ parse_number
11
+ end
12
+
13
+ def parse_number
14
+ @number = Phonelib.parse(chunk.gsub(/\D/, ''), @hint)
15
+ end
16
+
17
+ def to_s
18
+ number && number.national
19
+ end
20
+
21
+ def rfc
22
+ number && number.international
23
+ end
24
+
25
+ class << self
26
+ # http://rubular.com/r/tEHB6KcZzk
27
+ def regex
28
+ /(?:^|\s)((?:\+\(?\d{1,3}\W)?[\._\-\/\s]*\(?\s*?\d{2,3}\s*?\)?[\._\-\/\s]*\d{3,5}[\._\-\/\s]*\d{4,5})(?:\s|$)/
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,29 @@
1
+ module Ubi
2
+ module Memoria
3
+ # A site, url and title?
4
+ class Site < Base
5
+ #
6
+ # Prefix http:// if there isn't one defined
7
+ def format
8
+ value =~ /http/ ? value : "http://#{value}"
9
+ end
10
+
11
+ class << self
12
+ #
13
+ # Regex only for *.tld
14
+ def regex
15
+ # %r{https?://((?:\w+[\./]?)+)(?:/|\.)}
16
+ # (?:\??)[a-zA-Z0-9\-\._\?\,\'\/\\\+&%\$#\=~]+
17
+ # %r{(?:https?\://)?(?:www\.)?[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}}
18
+ # ((?:https\:\/\/)|(?:http\:\/\/)|(?:www\.))?([a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(?:\??)[a-zA-Z0-9\-\._\?\,\'\/\\\+&%\$#\=~]+)
19
+ # http://www.regexr.com/3bkne
20
+ /([(https?):\/\/(www\.)?a-zA-Z0-9@:%\._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?&\/\/?=]*))/
21
+ end
22
+
23
+ def key
24
+ :site
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,20 @@
1
+ module Ubi
2
+ module Memoria
3
+ # Social account
4
+ class Social < Base
5
+ class << self
6
+ def apps
7
+ {
8
+ twitter: 'twitter.com',
9
+ facebook: 'facebook.com',
10
+ foursquare: 'foursquare.com'
11
+ }
12
+ end
13
+
14
+ def regex
15
+ %r{https?://(?:\w+\.)*(#{url}/.*)}
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ module Ubi
2
+ module Memoria
3
+ # Whoami
4
+ class Who < Base
5
+ class << self
6
+ def apps
7
+ {
8
+ twitter: 'twitter.com',
9
+ facebook: 'facebook.com',
10
+ foursquare: 'foursquare.com'
11
+ }
12
+ end
13
+
14
+ def regex
15
+ %r{https?://(?:\w+\.)*(#{url}/.*)}
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,62 @@
1
+ module Ubi
2
+ #
3
+ # Thema -> subject, matter, case
4
+ #
5
+ class Thema
6
+ include ActiveModel::AttributeMethods
7
+ include ActiveModel::Serialization
8
+ include ActiveModel::Dirty
9
+
10
+ attr_accessor :name, :urls, :opts, :ascii, :clean
11
+
12
+ def initialize(name, urls = [], opts = {})
13
+ @name = name
14
+ @urls = urls
15
+ @opts = opts
16
+ @cache = Ubi.memorias.reduce({}) { |a, e| a.merge(e => opts[e]) }
17
+ reduce_names
18
+ end
19
+
20
+ def araneas
21
+ @araneas ||= urls.map { |u| Aranea.new(self, u) }
22
+ end
23
+
24
+ def reduce_names
25
+ @ascii = name.mb_chars.downcase
26
+ @downcase = name.mb_chars.downcase
27
+ @clean = @downcase.gsub(/\W/, ' ')
28
+ end
29
+
30
+ Ubi.memorias.each do |memoria|
31
+ define_method memoria.plural do
32
+ instance_variable_get('@' + memoria.plural) ||
33
+ instance_variable_set('@' + memoria.plural, [])
34
+ end
35
+ end
36
+
37
+ def [](arg)
38
+ @cache[arg]
39
+ end
40
+
41
+ def spec
42
+ puts self
43
+ Ubi.memorias.each do |memoria|
44
+ print Paint[memoria.name, :black]
45
+ puts self[memoria.key]
46
+ end
47
+ end
48
+
49
+ def try_consultor(a)
50
+ a = a.new(self)
51
+ Ubi.memorias.each do |m|
52
+ puts Paint["Trying to find #{m} in #{a.class}", :green]
53
+ @cache[m] = matches = m.parse(a.datum)
54
+ puts matches if matches && !matches.empty?
55
+ end
56
+ end
57
+
58
+ def to_s
59
+ name
60
+ end
61
+ end
62
+ end