ubi 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +11 -0
  5. data/Guardfile +16 -0
  6. data/MIT-LICENSE +19 -0
  7. data/README.md +57 -0
  8. data/Rakefile +9 -0
  9. data/bin/ubi +10 -0
  10. data/lib/ubi.rb +37 -0
  11. data/lib/ubi/aranea.rb +42 -0
  12. data/lib/ubi/artifex.rb +28 -0
  13. data/lib/ubi/consultor.rb +61 -0
  14. data/lib/ubi/consultores/bing.rb +26 -0
  15. data/lib/ubi/consultores/duckduckgo.rb +26 -0
  16. data/lib/ubi/consultores/facebook.rb +6 -0
  17. data/lib/ubi/consultores/foursquare.rb +6 -0
  18. data/lib/ubi/consultores/google.rb +24 -0
  19. data/lib/ubi/consultores/linkedin.rb +0 -0
  20. data/lib/ubi/consultores/twitter.rb +6 -0
  21. data/lib/ubi/consultores/wikipedia.rb +6 -0
  22. data/lib/ubi/consultores/yahoo.rb +26 -0
  23. data/lib/ubi/datum.rb +43 -0
  24. data/lib/ubi/impero.rb +20 -0
  25. data/lib/ubi/memoria.rb +72 -0
  26. data/lib/ubi/memorias/address.rb +71 -0
  27. data/lib/ubi/memorias/document.rb +50 -0
  28. data/lib/ubi/memorias/email.rb +19 -0
  29. data/lib/ubi/memorias/phone.rb +33 -0
  30. data/lib/ubi/memorias/site.rb +29 -0
  31. data/lib/ubi/memorias/social.rb +20 -0
  32. data/lib/ubi/memorias/who.rb +20 -0
  33. data/lib/ubi/thema.rb +62 -0
  34. data/lib/ubi/version.rb +4 -0
  35. data/spec/fixtures/email.txt +5 -0
  36. data/spec/fixtures/mobile.txt +17 -0
  37. data/spec/fixtures/page.txt +21 -0
  38. data/spec/fixtures/phone.txt +17 -0
  39. data/spec/fixtures/site.txt +21 -0
  40. data/spec/spec_helper.rb +40 -0
  41. data/spec/ubi/aranea_spec.rb +19 -0
  42. data/spec/ubi/artifex_spec.rb +4 -0
  43. data/spec/ubi/memorias/address_spec.rb +56 -0
  44. data/spec/ubi/memorias/document_spec.rb +48 -0
  45. data/spec/ubi/memorias/email_spec.rb +59 -0
  46. data/spec/ubi/memorias/phone_spec.rb +79 -0
  47. data/spec/ubi/memorias/site_spec.rb +82 -0
  48. data/spec/ubi/thema_spec.rb +33 -0
  49. data/ubi.gemspec +39 -0
  50. metadata +232 -0
@@ -0,0 +1,43 @@
1
+ module Ubi
2
+ # Suppose to be html reader
3
+ class Datum
4
+ attr_accessor :data, :words, :links
5
+
6
+ def initialize(data, words, links)
7
+ # binding.pry
8
+ @data = data
9
+ @words = data.xpath(words).text
10
+ @links = data.xpath(links).map { |a| a.values.join(' ') }
11
+ end
12
+
13
+ def xpath(path)
14
+ data.xpath(path)
15
+ end
16
+
17
+ def read_div(div)
18
+ data.xpath("//#{div}").text
19
+ end
20
+
21
+ def read_list(list, args = [])
22
+ s = struct_for(args)
23
+ data.xpath(list).map { |i| s.new(*i.xpath) }
24
+ end
25
+
26
+ def read_table(table, args = [], subs = '')
27
+ s = struct_for(*args)
28
+ data.xpath(table).map do |r|
29
+ s.new(*r.xpath('td/text()').map { |t| normalize(t, subs) })
30
+ end
31
+ end
32
+
33
+ def normalize(txt, subs = '')
34
+ txt.to_s.gsub(subs, '').strip.chomp
35
+ end
36
+
37
+ private
38
+
39
+ def struct_for(*keys)
40
+ @struct = Struct.new(*keys)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,20 @@
1
+ require 'paint'
2
+
3
+ module Ubi
4
+ # Impero: I command!
5
+ class Impero < Thor
6
+ class_option :verbose, type: :boolean, aliases: :v
7
+
8
+ # desc 'init', 'creates settings on ~'
9
+ desc 'find', 'Lookup for something'
10
+ long_desc <<-LONG
11
+
12
+ Find something based only on name
13
+
14
+ LONG
15
+ option :address, type: :string # 'Subject\'s address'
16
+ def find(name)
17
+ Ubi::Artifex.new(name).spec
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,72 @@
1
+ module Ubi
2
+ module Memoria
3
+ # Memoria Base
4
+ class Base
5
+ include ActiveModel::Validations
6
+ attr_accessor :value
7
+
8
+ def initialize(value, aranea = nil, thema = nil)
9
+ @value = value
10
+ @aranea = aranea
11
+ @thema = thema
12
+ end
13
+
14
+ # Format for #to_s
15
+ def format
16
+ value.downcase
17
+ end
18
+
19
+ def to_s
20
+ format
21
+ end
22
+
23
+ class << self
24
+ #
25
+ # Account for memorias
26
+ #
27
+ def inherited(base)
28
+ fail "Already defined #{base.key}" if Ubi.memorias.include?(base)
29
+ puts "With memoria #{base}"
30
+ Ubi.memorias << base
31
+ end
32
+
33
+ def extract_text(datum)
34
+ case datum
35
+ when String then datum
36
+ when Nokogiri::HTML then datum.data.text
37
+ else fail "Can't parse `#{datum.class}`"
38
+ end
39
+ end
40
+
41
+ def parse(datum)
42
+ fail "Not implemented by #{self}" unless regex
43
+ extract_text(datum).scan(regex).map { |r| new(r.first) }
44
+ end
45
+
46
+ #
47
+ # Human-readable name of the aranea
48
+ #
49
+ def key
50
+ @key ||= to_s.split('::').last.downcase.to_sym
51
+ # fail "Not implemented by #{self}"
52
+ end
53
+
54
+ #
55
+ # Human-readable name of the aranea
56
+ #
57
+ def name
58
+ to_s.split('::').last
59
+ end
60
+
61
+ def plural
62
+ "#{key}s"
63
+ end
64
+
65
+ def ==(other)
66
+ return unless other.respond_to?(:key)
67
+ key == other.key
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,71 @@
1
+ module Ubi
2
+ module Memoria
3
+ # An adress in this world
4
+ class Address < Base
5
+ DIVIDERS = /[,\-\|\/]/
6
+ SPLIT = /(?<=\D)#{DIVIDERS}|#{DIVIDERS}(?=\D)/
7
+ REGEXES = {
8
+ br: {
9
+ prefix: %w( r rua av avenida pç pça praça pc pca praca tv travessa est estrada rod rodovia ),
10
+ number: %w( n no nº num numero km ),
11
+ ext: %w( comp obs ap apto apart apartamento andar ),
12
+ zip: /\d{5}[-]\d{3}/
13
+ },
14
+ us: {
15
+ prefix: %w( st street av avenue road ),
16
+ zip: /\d{5}/
17
+ }
18
+ }
19
+
20
+ attr_accessor :name, :parts, :words, :zip, :place, :number,
21
+ :city, :region, :nation, :extra
22
+ #
23
+ #
24
+ # Init, remove non word chars
25
+ #
26
+ def initialize(val, _location = :br)
27
+ @value = Address.sanitize(val)
28
+ # @zip = value.match(REGEXES[location][:zip])
29
+ # @region = value.match(/\W([A-Z]{2})\W/)[1]
30
+ # @number = value.match(/\w*\d+\w*/)
31
+
32
+ @parts = value.split(SPLIT).map { |v| v.strip.chomp }
33
+ @words = parts.map { |pt| pt.split(/\s+/) }
34
+ end
35
+
36
+ def format(location = :br)
37
+ value.sub(*self.class.formats[location])
38
+ end
39
+
40
+ class << self
41
+ #
42
+ # Sanitizing
43
+ #
44
+ # ".." -> "."
45
+ # "\n" -> "-"
46
+ # " -" -> "-"
47
+ #
48
+ def sanitize(value)
49
+ value.gsub(/\s+/, ' ').gsub(/\\n/, '-')
50
+ .gsub(/\s?(#{DIVIDERS})\s?/, '\1')
51
+ end
52
+
53
+ def formats
54
+ {
55
+ # br: '%a, %n - %c %z %r',
56
+ # br: '%a, %n - %c %z %r',
57
+ br: '%a, %n - %c %z %r'
58
+ }
59
+ end
60
+
61
+ def regex
62
+ /((?:#{REGEXES[:br][:prefix].join('|')}).*)/i
63
+ end
64
+
65
+ def plural
66
+ :addresses
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,50 @@
1
+ module Ubi
2
+ module Memoria
3
+ # Show me your papers!
4
+ #
5
+ # https://en.wikipedia.org/wiki/National_identification_number
6
+ #
7
+ # br: CNPJ
8
+ # us: SSN
9
+ # cl: RUN/RUT
10
+ #
11
+ class Document < Base
12
+ #
13
+ #
14
+ # Init, remove non word chars
15
+ #
16
+ def initialize(value)
17
+ @value = value.gsub(/\W/, '')
18
+ end
19
+
20
+ def format(location = :br)
21
+ value.sub(*self.class.formats[location])
22
+ end
23
+
24
+ #
25
+ # Class methods
26
+ #
27
+ class << self
28
+ def regexes
29
+ {
30
+ br: /(\d{14}|\d{2}\.?\d{3}\.?\d{3}\/?\d{4}[-]?\d{2})/,
31
+ cl: /\d{2}\.\d{3}\.\d{3}[-][0-9kK]/,
32
+ us: /\d{3}[-]\d{2}[-]\d{4}/
33
+ }
34
+ end
35
+
36
+ def formats
37
+ {
38
+ br: [/(\d{2})(\d{3})(\d{3})(\d{4})(\d{2})/, '\1.\2.\3/\4-\5'],
39
+ cl: [/(\d{2})(\d{3})(\d{3})(\d{1})/, '\1.\2.\3-\4'],
40
+ us: [/(\d{3})(\d{2})(\d{4})/, '\1-\2-\3']
41
+ }
42
+ end
43
+
44
+ def regex(location = :br)
45
+ regexes[location]
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,19 @@
1
+ module Ubi
2
+ module Memoria
3
+ # An Electronic Mail
4
+ class Email < Base
5
+ #
6
+ #
7
+ # Class methods
8
+ #
9
+ class << self
10
+ #
11
+ # Email regex
12
+ #
13
+ def regex
14
+ %r{([a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)} # rubocop:disable Metrics/LineLength
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,33 @@
1
+ module Ubi
2
+ module Memoria
3
+ # A Phone! mobile? landline? who is calling???
4
+ class Phone < Base
5
+ attr_reader :number, :chunk
6
+
7
+ def initialize(chunk, hint = nil)
8
+ @hint = hint
9
+ @chunk = chunk
10
+ parse_number
11
+ end
12
+
13
+ def parse_number
14
+ @number = Phonelib.parse(chunk.gsub(/\D/, ''), @hint)
15
+ end
16
+
17
+ def to_s
18
+ number && number.national
19
+ end
20
+
21
+ def rfc
22
+ number && number.international
23
+ end
24
+
25
+ class << self
26
+ # http://rubular.com/r/tEHB6KcZzk
27
+ def regex
28
+ /(?:^|\s)((?:\+\(?\d{1,3}\W)?[\._\-\/\s]*\(?\s*?\d{2,3}\s*?\)?[\._\-\/\s]*\d{3,5}[\._\-\/\s]*\d{4,5})(?:\s|$)/
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,29 @@
1
+ module Ubi
2
+ module Memoria
3
+ # A site, url and title?
4
+ class Site < Base
5
+ #
6
+ # Prefix http:// if there isn't one defined
7
+ def format
8
+ value =~ /http/ ? value : "http://#{value}"
9
+ end
10
+
11
+ class << self
12
+ #
13
+ # Regex only for *.tld
14
+ def regex
15
+ # %r{https?://((?:\w+[\./]?)+)(?:/|\.)}
16
+ # (?:\??)[a-zA-Z0-9\-\._\?\,\'\/\\\+&%\$#\=~]+
17
+ # %r{(?:https?\://)?(?:www\.)?[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}}
18
+ # ((?:https\:\/\/)|(?:http\:\/\/)|(?:www\.))?([a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(?:\??)[a-zA-Z0-9\-\._\?\,\'\/\\\+&%\$#\=~]+)
19
+ # http://www.regexr.com/3bkne
20
+ /([(https?):\/\/(www\.)?a-zA-Z0-9@:%\._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?&\/\/?=]*))/
21
+ end
22
+
23
+ def key
24
+ :site
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,20 @@
1
+ module Ubi
2
+ module Memoria
3
+ # Social account
4
+ class Social < Base
5
+ class << self
6
+ def apps
7
+ {
8
+ twitter: 'twitter.com',
9
+ facebook: 'facebook.com',
10
+ foursquare: 'foursquare.com'
11
+ }
12
+ end
13
+
14
+ def regex
15
+ %r{https?://(?:\w+\.)*(#{url}/.*)}
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ module Ubi
2
+ module Memoria
3
+ # Whoami
4
+ class Who < Base
5
+ class << self
6
+ def apps
7
+ {
8
+ twitter: 'twitter.com',
9
+ facebook: 'facebook.com',
10
+ foursquare: 'foursquare.com'
11
+ }
12
+ end
13
+
14
+ def regex
15
+ %r{https?://(?:\w+\.)*(#{url}/.*)}
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,62 @@
1
+ module Ubi
2
+ #
3
+ # Thema -> subject, matter, case
4
+ #
5
+ class Thema
6
+ include ActiveModel::AttributeMethods
7
+ include ActiveModel::Serialization
8
+ include ActiveModel::Dirty
9
+
10
+ attr_accessor :name, :urls, :opts, :ascii, :clean
11
+
12
+ def initialize(name, urls = [], opts = {})
13
+ @name = name
14
+ @urls = urls
15
+ @opts = opts
16
+ @cache = Ubi.memorias.reduce({}) { |a, e| a.merge(e => opts[e]) }
17
+ reduce_names
18
+ end
19
+
20
+ def araneas
21
+ @araneas ||= urls.map { |u| Aranea.new(self, u) }
22
+ end
23
+
24
+ def reduce_names
25
+ @ascii = name.mb_chars.downcase
26
+ @downcase = name.mb_chars.downcase
27
+ @clean = @downcase.gsub(/\W/, ' ')
28
+ end
29
+
30
+ Ubi.memorias.each do |memoria|
31
+ define_method memoria.plural do
32
+ instance_variable_get('@' + memoria.plural) ||
33
+ instance_variable_set('@' + memoria.plural, [])
34
+ end
35
+ end
36
+
37
+ def [](arg)
38
+ @cache[arg]
39
+ end
40
+
41
+ def spec
42
+ puts self
43
+ Ubi.memorias.each do |memoria|
44
+ print Paint[memoria.name, :black]
45
+ puts self[memoria.key]
46
+ end
47
+ end
48
+
49
+ def try_consultor(a)
50
+ a = a.new(self)
51
+ Ubi.memorias.each do |m|
52
+ puts Paint["Trying to find #{m} in #{a.class}", :green]
53
+ @cache[m] = matches = m.parse(a.datum)
54
+ puts matches if matches && !matches.empty?
55
+ end
56
+ end
57
+
58
+ def to_s
59
+ name
60
+ end
61
+ end
62
+ end