ubi 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +11 -0
  5. data/Guardfile +16 -0
  6. data/MIT-LICENSE +19 -0
  7. data/README.md +57 -0
  8. data/Rakefile +9 -0
  9. data/bin/ubi +10 -0
  10. data/lib/ubi.rb +37 -0
  11. data/lib/ubi/aranea.rb +42 -0
  12. data/lib/ubi/artifex.rb +28 -0
  13. data/lib/ubi/consultor.rb +61 -0
  14. data/lib/ubi/consultores/bing.rb +26 -0
  15. data/lib/ubi/consultores/duckduckgo.rb +26 -0
  16. data/lib/ubi/consultores/facebook.rb +6 -0
  17. data/lib/ubi/consultores/foursquare.rb +6 -0
  18. data/lib/ubi/consultores/google.rb +24 -0
  19. data/lib/ubi/consultores/linkedin.rb +0 -0
  20. data/lib/ubi/consultores/twitter.rb +6 -0
  21. data/lib/ubi/consultores/wikipedia.rb +6 -0
  22. data/lib/ubi/consultores/yahoo.rb +26 -0
  23. data/lib/ubi/datum.rb +43 -0
  24. data/lib/ubi/impero.rb +20 -0
  25. data/lib/ubi/memoria.rb +72 -0
  26. data/lib/ubi/memorias/address.rb +71 -0
  27. data/lib/ubi/memorias/document.rb +50 -0
  28. data/lib/ubi/memorias/email.rb +19 -0
  29. data/lib/ubi/memorias/phone.rb +33 -0
  30. data/lib/ubi/memorias/site.rb +29 -0
  31. data/lib/ubi/memorias/social.rb +20 -0
  32. data/lib/ubi/memorias/who.rb +20 -0
  33. data/lib/ubi/thema.rb +62 -0
  34. data/lib/ubi/version.rb +4 -0
  35. data/spec/fixtures/email.txt +5 -0
  36. data/spec/fixtures/mobile.txt +17 -0
  37. data/spec/fixtures/page.txt +21 -0
  38. data/spec/fixtures/phone.txt +17 -0
  39. data/spec/fixtures/site.txt +21 -0
  40. data/spec/spec_helper.rb +40 -0
  41. data/spec/ubi/aranea_spec.rb +19 -0
  42. data/spec/ubi/artifex_spec.rb +4 -0
  43. data/spec/ubi/memorias/address_spec.rb +56 -0
  44. data/spec/ubi/memorias/document_spec.rb +48 -0
  45. data/spec/ubi/memorias/email_spec.rb +59 -0
  46. data/spec/ubi/memorias/phone_spec.rb +79 -0
  47. data/spec/ubi/memorias/site_spec.rb +82 -0
  48. data/spec/ubi/thema_spec.rb +33 -0
  49. data/ubi.gemspec +39 -0
  50. metadata +232 -0
@@ -0,0 +1,4 @@
1
+ # :nodoc:
2
+ module Ubi
3
+ VERSION = '0.0.3'
4
+ end
@@ -0,0 +1,5 @@
1
+ johh@gmail.com
2
+ 1112345678 johh@gmail.com
3
+ johh@gmail.com 11 12345678
4
+ mail: johh@gmail.com
5
+ mail:johh@gmail.com
@@ -0,0 +1,17 @@
1
+ 11998145678
2
+ 11 998145678
3
+ 11-998145678
4
+ 11.998145678
5
+ 11.99814.5678
6
+ 11 99814 5678
7
+ 11-99814-5678
8
+ (11)998145678
9
+ (11)99814-5678
10
+ (11) 99814-5678
11
+ ( 11) 99814-5678
12
+ ( 11 ) 99814-5678
13
+ ( 11 ) 99814 -5678
14
+ ( 11 ) 99814 - 5678
15
+ (11)99814-5678 bla
16
+ (11)99814-5678 bla 55
17
+ bla bla (11)99814-5678 bla 55
@@ -0,0 +1,21 @@
1
+ Faça sua parte
2
+ Você ou sua empresa podem contribuir de diversas formas para o futuro das crianças e jovens do Grupo Outono. Confira o projeto que mais se encaixa no seu perfil.
3
+
4
+ Como contribuir
5
+ Revista Outono
6
+ Conheça a edição 2014 da Revista Outono.
7
+
8
+ Quem atendemos?
9
+
10
+ Mais de 500 meninas e adolescentes e suas famílias;
11
+ Mais de 2000 moradores da comunidade do Jardim São Marcos e região;
12
+ Mais de 20 escolas da cidade de Campinas.
13
+
14
+ Telefone
15
+
16
+ Entre em contato conosco através do telefone
17
+ (19) 3857-8001
18
+
19
+ R. Manís Augusto Pucci, 30 – Jardim São Marcos, Campinas – SP, 13082-210
20
+ Encontre o Grupo Outono no Foogle Maps
21
+ (19) 3857.8001
@@ -0,0 +1,17 @@
1
+ 1112345678
2
+ 11 12345678
3
+ 11-12345678
4
+ 11.12345678
5
+ 11.1234.5678
6
+ 11 1234 5678
7
+ 11-1234-5678
8
+ (11)12345678
9
+ (11)1234-5678
10
+ (11) 1234-5678
11
+ ( 11) 1234-5678
12
+ ( 11 ) 1234-5678
13
+ ( 11 ) 1234 -5678
14
+ ( 11 ) 1234 - 5678
15
+ (11)1234-5678 bla
16
+ (11)1234-5678 bla 55
17
+ bla bla (11)1234-5678 bla 55
@@ -0,0 +1,21 @@
1
+ Welcome to RegExr 0.3b, an intuitive tool for learning, writing, and testing Regular Expressions. Key features include:
2
+
3
+ http://jdjd:senha@notabe.com
4
+ http://google.com/
5
+ irc://irc.freenode.net/git
6
+ https://google.com/, http://google.com/.
7
+ http://en.wikipedia.org/wiki/Git_(software)
8
+ Rubular (http://rubular.com/)
9
+ Another [http://rock.com]
10
+ and just for fun. {http://rocker.com}
11
+ googls.com
12
+ fof~fs.com?rock=jdjd&jba=jdj
13
+ (fofjs.com)
14
+ [jobsla.com]
15
+ ( ocu.dabund)
16
+ (jdjd.com )
17
+
18
+ * search Community expressions and add your own
19
+ * create Share Links to send your expressions to co-workers or link to them on Twitter or your blog [ex. http://RegExr.com?2rjl6 http://t.co/zhdlqoekdDF
20
+
21
+ Built by gskinner.com with Spelling Plus Library for text highlighting [gskinner.com/products/spl].
@@ -0,0 +1,40 @@
1
+ # require 'pry'
2
+ require 'paint'
3
+ require 'pry'
4
+ require 'vcr'
5
+ require 'webmock/rspec'
6
+
7
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
8
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
9
+
10
+ VCR.configure do |c|
11
+ c.cassette_library_dir = "#{File.dirname(__FILE__)}/cassettes"
12
+ c.hook_into :webmock
13
+ c.configure_rspec_metadata!
14
+ end
15
+
16
+ require 'ubi'
17
+ include Ubi
18
+
19
+ FIXTURE_PATH = File.join(File.dirname(__FILE__), 'fixtures')
20
+
21
+ def load_fixture(name)
22
+ File.read(File.join(FIXTURE_PATH, name))
23
+ end
24
+
25
+ if ENV['CI']
26
+ require 'coveralls'
27
+ Coveralls.wear!
28
+ end
29
+
30
+ RSpec.configure do |config|
31
+ config.order = 'random'
32
+ # config.around(:each) do |example|
33
+ # t = Time.now
34
+ # print Paint[example.metadata[:full_description], '#999']
35
+ # # _, vcr = example.metadata[:full_description].split('VCR ')
36
+ # # VCR.use_cassette(vcr) { example.run }
37
+ # puts Paint[" [#{Time.now - t}s]", :black]
38
+ # end
39
+ # config.before(:each) { Polipus::SignalHandler.disable }
40
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ describe Aranea do
4
+ it 'should delegate name to thema' do
5
+ aranea = Thema.new('Rock', ['rock.com']).araneas.first
6
+ expect(aranea.name).to eq('Rock')
7
+ end
8
+
9
+ it 'should use only one url' do
10
+ aranea = Thema.new('Rock', ['r1.com', 'r2.com']).araneas.first
11
+ expect(aranea.url).to eq('r1.com')
12
+ end
13
+
14
+ # it 'should crawl a domain VCR ruby-lang', :vcr do
15
+ # ruby = Thema.new('fire', ['http://fireho.com'])
16
+ # ar = ruby.araneas.first
17
+ # expect(ar.datum).to match(/oi/)
18
+ # end
19
+ end
@@ -0,0 +1,4 @@
1
+ require 'spec_helper'
2
+
3
+ describe Artifex do
4
+ end
@@ -0,0 +1,56 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Address do
4
+ [
5
+ 'R Bahia,55 - Una/BR - 12345-678',
6
+ 'R Bahia,55 - Una/BR - 12345- 678',
7
+ 'R Bahia, 55 - Una/BR - 12345-678',
8
+ 'R Bahia, 55 - Una/BR - 12345-678',
9
+ 'R Bahia, 55 / Una/BR - 12345-678',
10
+ 'R Bahia, 55 \n Una/BR \n 12345-678',
11
+ 'R Bahia, 55, Una/BR - 12345-678'
12
+ ].each do |chunk|
13
+ describe "simple delimited `#{chunk}`" do
14
+ subject { Memoria::Address.parse(chunk) }
15
+
16
+ it { is_expected.to be_an Array }
17
+ it { expect(subject.size).to eq(1) }
18
+ it { is_expected.to include(Memoria::Address) }
19
+
20
+ describe 'instance' do
21
+ let(:addr) { subject.first }
22
+
23
+ it { expect(addr.zip.to_s).to eq('12345-678') }
24
+ it { expect(addr.number.to_s).to eq('55') }
25
+ it { expect(addr.region.to_s).to eq('BR') }
26
+
27
+ it 'should split into parts' do
28
+ expect(addr.parts).to eq(['R Bahia', '55', 'Una', 'BR', '12345-678'])
29
+ end
30
+
31
+ it 'should split into words' do
32
+ expect(addr.words).to eq([%w(R Bahia), ['55'], ['Una'], ['BR'], ['12345-678']])
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ describe 'sanitizing' do
39
+ subject { Memoria::Address }
40
+ it { expect(subject.sanitize('Fu Ba')).to eq('Fu Ba') }
41
+ it { expect(subject.sanitize('Fu Ba')).to eq('Fu Ba') }
42
+ it { expect(subject.sanitize('Fu Ba')).to eq('Fu Ba') }
43
+ it { expect(subject.sanitize('Fu, Ba')).to eq('Fu,Ba') }
44
+ it { expect(subject.sanitize('Fu ,Ba')).to eq('Fu,Ba') }
45
+ it { expect(subject.sanitize('Fu , Ba')).to eq('Fu,Ba') }
46
+ it { expect(subject.sanitize('Fu , Ba')).to eq('Fu,Ba') }
47
+ it { expect(subject.sanitize('Fu , Ba')).to eq('Fu,Ba') }
48
+ it { expect(subject.sanitize('123454-789')).to eq('123454-789') }
49
+ it { expect(subject.sanitize('123454 -789')).to eq('123454-789') }
50
+ it { expect(subject.sanitize('123454- 789')).to eq('123454-789') }
51
+ it { expect(subject.sanitize('123454 - 789')).to eq('123454-789') }
52
+ it { expect(subject.sanitize('foo,55 - Ubi')).to eq('foo,55-Ubi') }
53
+ it { expect(subject.sanitize('foo,55 - Ubi')).to eq('foo,55-Ubi') }
54
+ it { expect(subject.sanitize('BR - 12345- 678')).to eq('BR-12345-678') }
55
+ end
56
+ end
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Document do
4
+ describe 'simple test' do
5
+ subject { Memoria::Document.parse('bla bla 5 12.345.678/0001-90 79 so') }
6
+
7
+ it { is_expected.to include(Memoria::Document) }
8
+ it { is_expected.to be_an Array }
9
+ end
10
+
11
+ describe 'brazilian CNPJ' do
12
+ def parse(document)
13
+ Memoria::Document.parse(document).first.to_s
14
+ end
15
+
16
+ let(:parsed) { '12.345.678/0001-90' }
17
+
18
+ it { expect(parse('12345678000190')).to eq(parsed) }
19
+ it { expect(parse('123456780001-90')).to eq(parsed) }
20
+ it { expect(parse('12345678/0001-90')).to eq(parsed) }
21
+ it { expect(parse('12345.678/0001-90')).to eq(parsed) }
22
+ it { expect(parse('12.345.678/0001-90')).to eq(parsed) }
23
+ end
24
+
25
+ describe 'not documents' do
26
+ %w(
27
+ 1234
28
+ 12345
29
+ 123456
30
+ 1234567
31
+ 123@email.com
32
+ 11982345678
33
+ 11 982345678
34
+ 11-982345678
35
+ 11.982345678
36
+ 11-98234-5678
37
+ 11 982345678
38
+ (11)982345678
39
+ (11)98234-5678
40
+ (11) 98234-5678
41
+ (11) 98234.5678
42
+ ).each do |bad_document|
43
+ it "should not parse '#{bad_document}'" do
44
+ expect(Memoria::Document.parse(bad_document).first).to be_nil
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,59 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Email do
4
+ describe 'simple test' do
5
+ subject { Memoria::Email.parse('bla bla me@somesite.com') }
6
+
7
+ it { is_expected.to include(Memoria::Email) }
8
+ it { is_expected.to be_an Array }
9
+ end
10
+
11
+ describe 'valid' do
12
+ %w(
13
+ foo@foo.com
14
+ foo@foo.org
15
+ foo@foo.net
16
+ foo_bar@foo.net
17
+ foo-bar@foo.net
18
+ foo.bar@foo.net
19
+ foo.bar9@foo.net
20
+ foo.bar@99foo.net
21
+ zumbi@land.com.br
22
+ zumbi@land.org.br
23
+ zumbi@land.net.br
24
+ zum@dom.land.co.uk
25
+ zum@ad.dom.land.co.tk
26
+ ).each do |good_mail|
27
+ it "should correctly parse '#{good_mail}'" do
28
+ res = Memoria::Email.parse(good_mail)
29
+ expect(res.first.to_s).to eq(good_mail)
30
+ expect(res.size).to eq(1)
31
+ end
32
+ end
33
+ end
34
+
35
+ describe 'invalid' do
36
+ %w(
37
+ @foo
38
+ foo@foo
39
+ @foo.org
40
+ zumbi@.com
41
+ foo@@foo.com
42
+ zum.@good.com
43
+ @11 53 2355
44
+ @11532355
45
+ ).each do |bad_mail|
46
+ it "should not parse '#{bad_mail}'" do
47
+ expect(Memoria::Email.parse(bad_mail).first).to be_nil
48
+ end
49
+ end
50
+ end
51
+
52
+ describe 'parsed emails txt' do
53
+ load_fixture('email.txt').each_line do |l|
54
+ it "Should parse email #{l}" do
55
+ expect(Memoria::Email.parse(l).first.to_s).to eq('johh@gmail.com')
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,79 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Phone do
4
+ describe 'simple test' do
5
+ subject { Memoria::Phone.new('+551112345678') }
6
+
7
+ it { is_expected.to be_a Memoria::Phone }
8
+ end
9
+
10
+ describe 'parsed landlines' do
11
+ def parse(phone)
12
+ Memoria::Phone.parse(phone).first.rfc
13
+ end
14
+
15
+ let(:parsed) { '+1112345678' }
16
+
17
+ load_fixture('phone.txt').each_line do |l|
18
+ it "Should parse phone #{l}" do
19
+ expect(parse(l)).to eq(parsed)
20
+ end
21
+ end
22
+ end
23
+
24
+ describe 'parsed mobiles txt' do
25
+ def parse(phone)
26
+ Memoria::Phone.parse(phone).first.rfc
27
+ end
28
+
29
+ let(:parsed) { '+11998145678' }
30
+
31
+ load_fixture('mobile.txt').each_line do |l|
32
+ it "Should parse phone #{l}" do
33
+ expect(parse(l)).to eq(parsed)
34
+ end
35
+ end
36
+ end
37
+
38
+ describe 'parsed mobiles' do
39
+ def parse(phone)
40
+ Memoria::Phone.parse(phone).first.rfc
41
+ end
42
+
43
+ let(:parsed) { '+11982345678' }
44
+
45
+ it { expect(parse('11982345678')).to eq(parsed) }
46
+ it { expect(parse('11 982345678')).to eq(parsed) }
47
+ it { expect(parse('11-982345678')).to eq(parsed) }
48
+ it { expect(parse('11.982345678')).to eq(parsed) }
49
+ it { expect(parse('11-98234-5678')).to eq(parsed) }
50
+ it { expect(parse('11 982345678')).to eq(parsed) }
51
+ it { expect(parse('(11)982345678')).to eq(parsed) }
52
+ it { expect(parse('(11)98234-5678')).to eq(parsed) }
53
+ it { expect(parse('(11) 98234-5678')).to eq(parsed) }
54
+ it { expect(parse('(11) 98234.5678')).to eq(parsed) }
55
+ end
56
+
57
+ describe 'regex' do
58
+ subject(:regex) { Memoria::Phone.regex }
59
+ it { expect('(11) 98234.5678 547\n5475\n'.scan(regex).size).to eq(1) }
60
+ end
61
+
62
+ describe 'invalid' do
63
+ %w(
64
+ 1234
65
+ 12345
66
+ 123456
67
+ 1234567
68
+ 123@email.com
69
+ 12.345.678/0001-90
70
+ 12345678000190
71
+ 069.789.136-45
72
+ 123456@ema5678.com
73
+ ).each do |bad_phone|
74
+ it "should not parse '#{bad_phone}'" do
75
+ expect(Memoria::Phone.parse(bad_phone).first).to be_nil
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,82 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Site do
4
+ describe 'simple test' do
5
+ subject { Memoria::Site.parse('bla bla me@somesite.com') }
6
+
7
+ it { is_expected.to include(Memoria::Site) }
8
+ it { is_expected.to be_an Array }
9
+ end
10
+
11
+ describe 'parsed' do
12
+ def parse(site)
13
+ Memoria::Site.parse(site).first.to_s
14
+ end
15
+
16
+ describe 'http' do
17
+ let(:parsed) { 'http://fubah.com' }
18
+
19
+ it { expect(parse('fubah.com')).to eq(parsed) }
20
+ it { expect(parse('@fubah.com')).to eq('http://@fubah.com') }
21
+ it { expect(parse('fu@fubah.com')).to eq('http://fu@fubah.com') }
22
+ it { expect(parse('http://fubah.com')).to eq(parsed) }
23
+ it { expect(parse('http://fubah.com/56')).to eq(parsed + '/56') }
24
+ end
25
+
26
+ describe 'https' do
27
+ let(:parsed) { 'https://fubah.com' }
28
+
29
+ it { expect(parse('https://fubah.com')).to eq(parsed) }
30
+ it { expect(parse('https://fubah.com?56')).to eq(parsed + '?56') }
31
+ end
32
+
33
+ describe 'subdomain' do
34
+ let(:parsed) { 'http://www.fubah.com' }
35
+
36
+ it { expect(parse('www.fubah.com')).to eq(parsed) }
37
+ it { expect(parse('http://www.fubah.com')).to eq(parsed) }
38
+ it { expect(parse('http://www.fubah.com/f?56')).to eq(parsed + '/f?56') }
39
+ end
40
+ end
41
+
42
+ describe 'valid http' do
43
+ %w(
44
+ oo.com
45
+ oo.org
46
+ oo.net
47
+ foo.net
48
+ foo.net
49
+ foo.net
50
+ foo.net
51
+ 9foo.net
52
+ land.com.br
53
+ land.org.br
54
+ land.net.br
55
+ dom.land.co.uk
56
+ ad.dom.land.co.tk
57
+ http://foo.com
58
+ http://www.foo.com
59
+ ).each do |good_site|
60
+ it "should correctly parse '#{good_site}'" do
61
+ res = Memoria::Site.parse(good_site)
62
+ good_site = "http://#{good_site}" if good_site !~ /http/
63
+ expect(res.first.to_s).to eq(good_site)
64
+ expect(res.size).to eq(1)
65
+ end
66
+ end
67
+ end
68
+
69
+ describe 'invalid' do
70
+ %w(
71
+ @foo
72
+ foo@foo
73
+ zumbi@.com
74
+ @11 53 2355
75
+ @11532355
76
+ ).each do |bad_site|
77
+ it "should not parse '#{bad_site}'" do
78
+ expect(Memoria::Site.parse(bad_site).first).to be_nil
79
+ end
80
+ end
81
+ end
82
+ end