ubi 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +11 -0
  5. data/Guardfile +16 -0
  6. data/MIT-LICENSE +19 -0
  7. data/README.md +57 -0
  8. data/Rakefile +9 -0
  9. data/bin/ubi +10 -0
  10. data/lib/ubi.rb +37 -0
  11. data/lib/ubi/aranea.rb +42 -0
  12. data/lib/ubi/artifex.rb +28 -0
  13. data/lib/ubi/consultor.rb +61 -0
  14. data/lib/ubi/consultores/bing.rb +26 -0
  15. data/lib/ubi/consultores/duckduckgo.rb +26 -0
  16. data/lib/ubi/consultores/facebook.rb +6 -0
  17. data/lib/ubi/consultores/foursquare.rb +6 -0
  18. data/lib/ubi/consultores/google.rb +24 -0
  19. data/lib/ubi/consultores/linkedin.rb +0 -0
  20. data/lib/ubi/consultores/twitter.rb +6 -0
  21. data/lib/ubi/consultores/wikipedia.rb +6 -0
  22. data/lib/ubi/consultores/yahoo.rb +26 -0
  23. data/lib/ubi/datum.rb +43 -0
  24. data/lib/ubi/impero.rb +20 -0
  25. data/lib/ubi/memoria.rb +72 -0
  26. data/lib/ubi/memorias/address.rb +71 -0
  27. data/lib/ubi/memorias/document.rb +50 -0
  28. data/lib/ubi/memorias/email.rb +19 -0
  29. data/lib/ubi/memorias/phone.rb +33 -0
  30. data/lib/ubi/memorias/site.rb +29 -0
  31. data/lib/ubi/memorias/social.rb +20 -0
  32. data/lib/ubi/memorias/who.rb +20 -0
  33. data/lib/ubi/thema.rb +62 -0
  34. data/lib/ubi/version.rb +4 -0
  35. data/spec/fixtures/email.txt +5 -0
  36. data/spec/fixtures/mobile.txt +17 -0
  37. data/spec/fixtures/page.txt +21 -0
  38. data/spec/fixtures/phone.txt +17 -0
  39. data/spec/fixtures/site.txt +21 -0
  40. data/spec/spec_helper.rb +40 -0
  41. data/spec/ubi/aranea_spec.rb +19 -0
  42. data/spec/ubi/artifex_spec.rb +4 -0
  43. data/spec/ubi/memorias/address_spec.rb +56 -0
  44. data/spec/ubi/memorias/document_spec.rb +48 -0
  45. data/spec/ubi/memorias/email_spec.rb +59 -0
  46. data/spec/ubi/memorias/phone_spec.rb +79 -0
  47. data/spec/ubi/memorias/site_spec.rb +82 -0
  48. data/spec/ubi/thema_spec.rb +33 -0
  49. data/ubi.gemspec +39 -0
  50. metadata +232 -0
@@ -0,0 +1,4 @@
1
+ # :nodoc:
2
+ module Ubi
3
+ VERSION = '0.0.3'
4
+ end
@@ -0,0 +1,5 @@
1
+ johh@gmail.com
2
+ 1112345678 johh@gmail.com
3
+ johh@gmail.com 11 12345678
4
+ mail: johh@gmail.com
5
+ mail:johh@gmail.com
@@ -0,0 +1,17 @@
1
+ 11998145678
2
+ 11 998145678
3
+ 11-998145678
4
+ 11.998145678
5
+ 11.99814.5678
6
+ 11 99814 5678
7
+ 11-99814-5678
8
+ (11)998145678
9
+ (11)99814-5678
10
+ (11) 99814-5678
11
+ ( 11) 99814-5678
12
+ ( 11 ) 99814-5678
13
+ ( 11 ) 99814 -5678
14
+ ( 11 ) 99814 - 5678
15
+ (11)99814-5678 bla
16
+ (11)99814-5678 bla 55
17
+ bla bla (11)99814-5678 bla 55
@@ -0,0 +1,21 @@
1
+ Faça sua parte
2
+ Você ou sua empresa podem contribuir de diversas formas para o futuro das crianças e jovens do Grupo Outono. Confira o projeto que mais se encaixa no seu perfil.
3
+
4
+ Como contribuir
5
+ Revista Outono
6
+ Conheça a edição 2014 da Revista Outono.
7
+
8
+ Quem atendemos?
9
+
10
+ Mais de 500 meninas e adolescentes e suas famílias;
11
+ Mais de 2000 moradores da comunidade do Jardim São Marcos e região;
12
+ Mais de 20 escolas da cidade de Campinas.
13
+
14
+ Telefone
15
+
16
+ Entre em contato conosco através do telefone
17
+ (19) 3857-8001
18
+
19
+ R. Manís Augusto Pucci, 30 – Jardim São Marcos, Campinas – SP, 13082-210
20
+ Encontre o Grupo Outono no Foogle Maps
21
+ (19) 3857.8001
@@ -0,0 +1,17 @@
1
+ 1112345678
2
+ 11 12345678
3
+ 11-12345678
4
+ 11.12345678
5
+ 11.1234.5678
6
+ 11 1234 5678
7
+ 11-1234-5678
8
+ (11)12345678
9
+ (11)1234-5678
10
+ (11) 1234-5678
11
+ ( 11) 1234-5678
12
+ ( 11 ) 1234-5678
13
+ ( 11 ) 1234 -5678
14
+ ( 11 ) 1234 - 5678
15
+ (11)1234-5678 bla
16
+ (11)1234-5678 bla 55
17
+ bla bla (11)1234-5678 bla 55
@@ -0,0 +1,21 @@
1
+ Welcome to RegExr 0.3b, an intuitive tool for learning, writing, and testing Regular Expressions. Key features include:
2
+
3
+ http://jdjd:senha@notabe.com
4
+ http://google.com/
5
+ irc://irc.freenode.net/git
6
+ https://google.com/, http://google.com/.
7
+ http://en.wikipedia.org/wiki/Git_(software)
8
+ Rubular (http://rubular.com/)
9
+ Another [http://rock.com]
10
+ and just for fun. {http://rocker.com}
11
+ googls.com
12
+ fof~fs.com?rock=jdjd&jba=jdj
13
+ (fofjs.com)
14
+ [jobsla.com]
15
+ ( ocu.dabund)
16
+ (jdjd.com )
17
+
18
+ * search Community expressions and add your own
19
+ * create Share Links to send your expressions to co-workers or link to them on Twitter or your blog [ex. http://RegExr.com?2rjl6 http://t.co/zhdlqoekdDF
20
+
21
+ Built by gskinner.com with Spelling Plus Library for text highlighting [gskinner.com/products/spl].
@@ -0,0 +1,40 @@
1
+ # require 'pry'
2
+ require 'paint'
3
+ require 'pry'
4
+ require 'vcr'
5
+ require 'webmock/rspec'
6
+
7
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
8
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
9
+
10
+ VCR.configure do |c|
11
+ c.cassette_library_dir = "#{File.dirname(__FILE__)}/cassettes"
12
+ c.hook_into :webmock
13
+ c.configure_rspec_metadata!
14
+ end
15
+
16
+ require 'ubi'
17
+ include Ubi
18
+
19
+ FIXTURE_PATH = File.join(File.dirname(__FILE__), 'fixtures')
20
+
21
+ def load_fixture(name)
22
+ File.read(File.join(FIXTURE_PATH, name))
23
+ end
24
+
25
+ if ENV['CI']
26
+ require 'coveralls'
27
+ Coveralls.wear!
28
+ end
29
+
30
+ RSpec.configure do |config|
31
+ config.order = 'random'
32
+ # config.around(:each) do |example|
33
+ # t = Time.now
34
+ # print Paint[example.metadata[:full_description], '#999']
35
+ # # _, vcr = example.metadata[:full_description].split('VCR ')
36
+ # # VCR.use_cassette(vcr) { example.run }
37
+ # puts Paint[" [#{Time.now - t}s]", :black]
38
+ # end
39
+ # config.before(:each) { Polipus::SignalHandler.disable }
40
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ describe Aranea do
4
+ it 'should delegate name to thema' do
5
+ aranea = Thema.new('Rock', ['rock.com']).araneas.first
6
+ expect(aranea.name).to eq('Rock')
7
+ end
8
+
9
+ it 'should use only one url' do
10
+ aranea = Thema.new('Rock', ['r1.com', 'r2.com']).araneas.first
11
+ expect(aranea.url).to eq('r1.com')
12
+ end
13
+
14
+ # it 'should crawl a domain VCR ruby-lang', :vcr do
15
+ # ruby = Thema.new('fire', ['http://fireho.com'])
16
+ # ar = ruby.araneas.first
17
+ # expect(ar.datum).to match(/oi/)
18
+ # end
19
+ end
@@ -0,0 +1,4 @@
1
+ require 'spec_helper'
2
+
3
+ describe Artifex do
4
+ end
@@ -0,0 +1,56 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Address do
4
+ [
5
+ 'R Bahia,55 - Una/BR - 12345-678',
6
+ 'R Bahia,55 - Una/BR - 12345- 678',
7
+ 'R Bahia, 55 - Una/BR - 12345-678',
8
+ 'R Bahia, 55 - Una/BR - 12345-678',
9
+ 'R Bahia, 55 / Una/BR - 12345-678',
10
+ 'R Bahia, 55 \n Una/BR \n 12345-678',
11
+ 'R Bahia, 55, Una/BR - 12345-678'
12
+ ].each do |chunk|
13
+ describe "simple delimited `#{chunk}`" do
14
+ subject { Memoria::Address.parse(chunk) }
15
+
16
+ it { is_expected.to be_an Array }
17
+ it { expect(subject.size).to eq(1) }
18
+ it { is_expected.to include(Memoria::Address) }
19
+
20
+ describe 'instance' do
21
+ let(:addr) { subject.first }
22
+
23
+ it { expect(addr.zip.to_s).to eq('12345-678') }
24
+ it { expect(addr.number.to_s).to eq('55') }
25
+ it { expect(addr.region.to_s).to eq('BR') }
26
+
27
+ it 'should split into parts' do
28
+ expect(addr.parts).to eq(['R Bahia', '55', 'Una', 'BR', '12345-678'])
29
+ end
30
+
31
+ it 'should split into words' do
32
+ expect(addr.words).to eq([%w(R Bahia), ['55'], ['Una'], ['BR'], ['12345-678']])
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ describe 'sanitizing' do
39
+ subject { Memoria::Address }
40
+ it { expect(subject.sanitize('Fu Ba')).to eq('Fu Ba') }
41
+ it { expect(subject.sanitize('Fu Ba')).to eq('Fu Ba') }
42
+ it { expect(subject.sanitize('Fu Ba')).to eq('Fu Ba') }
43
+ it { expect(subject.sanitize('Fu, Ba')).to eq('Fu,Ba') }
44
+ it { expect(subject.sanitize('Fu ,Ba')).to eq('Fu,Ba') }
45
+ it { expect(subject.sanitize('Fu , Ba')).to eq('Fu,Ba') }
46
+ it { expect(subject.sanitize('Fu , Ba')).to eq('Fu,Ba') }
47
+ it { expect(subject.sanitize('Fu , Ba')).to eq('Fu,Ba') }
48
+ it { expect(subject.sanitize('123454-789')).to eq('123454-789') }
49
+ it { expect(subject.sanitize('123454 -789')).to eq('123454-789') }
50
+ it { expect(subject.sanitize('123454- 789')).to eq('123454-789') }
51
+ it { expect(subject.sanitize('123454 - 789')).to eq('123454-789') }
52
+ it { expect(subject.sanitize('foo,55 - Ubi')).to eq('foo,55-Ubi') }
53
+ it { expect(subject.sanitize('foo,55 - Ubi')).to eq('foo,55-Ubi') }
54
+ it { expect(subject.sanitize('BR - 12345- 678')).to eq('BR-12345-678') }
55
+ end
56
+ end
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Document do
4
+ describe 'simple test' do
5
+ subject { Memoria::Document.parse('bla bla 5 12.345.678/0001-90 79 so') }
6
+
7
+ it { is_expected.to include(Memoria::Document) }
8
+ it { is_expected.to be_an Array }
9
+ end
10
+
11
+ describe 'brazilian CNPJ' do
12
+ def parse(document)
13
+ Memoria::Document.parse(document).first.to_s
14
+ end
15
+
16
+ let(:parsed) { '12.345.678/0001-90' }
17
+
18
+ it { expect(parse('12345678000190')).to eq(parsed) }
19
+ it { expect(parse('123456780001-90')).to eq(parsed) }
20
+ it { expect(parse('12345678/0001-90')).to eq(parsed) }
21
+ it { expect(parse('12345.678/0001-90')).to eq(parsed) }
22
+ it { expect(parse('12.345.678/0001-90')).to eq(parsed) }
23
+ end
24
+
25
+ describe 'not documents' do
26
+ %w(
27
+ 1234
28
+ 12345
29
+ 123456
30
+ 1234567
31
+ 123@email.com
32
+ 11982345678
33
+ 11 982345678
34
+ 11-982345678
35
+ 11.982345678
36
+ 11-98234-5678
37
+ 11 982345678
38
+ (11)982345678
39
+ (11)98234-5678
40
+ (11) 98234-5678
41
+ (11) 98234.5678
42
+ ).each do |bad_document|
43
+ it "should not parse '#{bad_document}'" do
44
+ expect(Memoria::Document.parse(bad_document).first).to be_nil
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,59 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Email do
4
+ describe 'simple test' do
5
+ subject { Memoria::Email.parse('bla bla me@somesite.com') }
6
+
7
+ it { is_expected.to include(Memoria::Email) }
8
+ it { is_expected.to be_an Array }
9
+ end
10
+
11
+ describe 'valid' do
12
+ %w(
13
+ foo@foo.com
14
+ foo@foo.org
15
+ foo@foo.net
16
+ foo_bar@foo.net
17
+ foo-bar@foo.net
18
+ foo.bar@foo.net
19
+ foo.bar9@foo.net
20
+ foo.bar@99foo.net
21
+ zumbi@land.com.br
22
+ zumbi@land.org.br
23
+ zumbi@land.net.br
24
+ zum@dom.land.co.uk
25
+ zum@ad.dom.land.co.tk
26
+ ).each do |good_mail|
27
+ it "should correctly parse '#{good_mail}'" do
28
+ res = Memoria::Email.parse(good_mail)
29
+ expect(res.first.to_s).to eq(good_mail)
30
+ expect(res.size).to eq(1)
31
+ end
32
+ end
33
+ end
34
+
35
+ describe 'invalid' do
36
+ %w(
37
+ @foo
38
+ foo@foo
39
+ @foo.org
40
+ zumbi@.com
41
+ foo@@foo.com
42
+ zum.@good.com
43
+ @11 53 2355
44
+ @11532355
45
+ ).each do |bad_mail|
46
+ it "should not parse '#{bad_mail}'" do
47
+ expect(Memoria::Email.parse(bad_mail).first).to be_nil
48
+ end
49
+ end
50
+ end
51
+
52
+ describe 'parsed emails txt' do
53
+ load_fixture('email.txt').each_line do |l|
54
+ it "Should parse email #{l}" do
55
+ expect(Memoria::Email.parse(l).first.to_s).to eq('johh@gmail.com')
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,79 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Phone do
4
+ describe 'simple test' do
5
+ subject { Memoria::Phone.new('+551112345678') }
6
+
7
+ it { is_expected.to be_a Memoria::Phone }
8
+ end
9
+
10
+ describe 'parsed landlines' do
11
+ def parse(phone)
12
+ Memoria::Phone.parse(phone).first.rfc
13
+ end
14
+
15
+ let(:parsed) { '+1112345678' }
16
+
17
+ load_fixture('phone.txt').each_line do |l|
18
+ it "Should parse phone #{l}" do
19
+ expect(parse(l)).to eq(parsed)
20
+ end
21
+ end
22
+ end
23
+
24
+ describe 'parsed mobiles txt' do
25
+ def parse(phone)
26
+ Memoria::Phone.parse(phone).first.rfc
27
+ end
28
+
29
+ let(:parsed) { '+11998145678' }
30
+
31
+ load_fixture('mobile.txt').each_line do |l|
32
+ it "Should parse phone #{l}" do
33
+ expect(parse(l)).to eq(parsed)
34
+ end
35
+ end
36
+ end
37
+
38
+ describe 'parsed mobiles' do
39
+ def parse(phone)
40
+ Memoria::Phone.parse(phone).first.rfc
41
+ end
42
+
43
+ let(:parsed) { '+11982345678' }
44
+
45
+ it { expect(parse('11982345678')).to eq(parsed) }
46
+ it { expect(parse('11 982345678')).to eq(parsed) }
47
+ it { expect(parse('11-982345678')).to eq(parsed) }
48
+ it { expect(parse('11.982345678')).to eq(parsed) }
49
+ it { expect(parse('11-98234-5678')).to eq(parsed) }
50
+ it { expect(parse('11 982345678')).to eq(parsed) }
51
+ it { expect(parse('(11)982345678')).to eq(parsed) }
52
+ it { expect(parse('(11)98234-5678')).to eq(parsed) }
53
+ it { expect(parse('(11) 98234-5678')).to eq(parsed) }
54
+ it { expect(parse('(11) 98234.5678')).to eq(parsed) }
55
+ end
56
+
57
+ describe 'regex' do
58
+ subject(:regex) { Memoria::Phone.regex }
59
+ it { expect('(11) 98234.5678 547\n5475\n'.scan(regex).size).to eq(1) }
60
+ end
61
+
62
+ describe 'invalid' do
63
+ %w(
64
+ 1234
65
+ 12345
66
+ 123456
67
+ 1234567
68
+ 123@email.com
69
+ 12.345.678/0001-90
70
+ 12345678000190
71
+ 069.789.136-45
72
+ 123456@ema5678.com
73
+ ).each do |bad_phone|
74
+ it "should not parse '#{bad_phone}'" do
75
+ expect(Memoria::Phone.parse(bad_phone).first).to be_nil
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,82 @@
1
+ require 'spec_helper'
2
+
3
+ describe Memoria::Site do
4
+ describe 'simple test' do
5
+ subject { Memoria::Site.parse('bla bla me@somesite.com') }
6
+
7
+ it { is_expected.to include(Memoria::Site) }
8
+ it { is_expected.to be_an Array }
9
+ end
10
+
11
+ describe 'parsed' do
12
+ def parse(site)
13
+ Memoria::Site.parse(site).first.to_s
14
+ end
15
+
16
+ describe 'http' do
17
+ let(:parsed) { 'http://fubah.com' }
18
+
19
+ it { expect(parse('fubah.com')).to eq(parsed) }
20
+ it { expect(parse('@fubah.com')).to eq('http://@fubah.com') }
21
+ it { expect(parse('fu@fubah.com')).to eq('http://fu@fubah.com') }
22
+ it { expect(parse('http://fubah.com')).to eq(parsed) }
23
+ it { expect(parse('http://fubah.com/56')).to eq(parsed + '/56') }
24
+ end
25
+
26
+ describe 'https' do
27
+ let(:parsed) { 'https://fubah.com' }
28
+
29
+ it { expect(parse('https://fubah.com')).to eq(parsed) }
30
+ it { expect(parse('https://fubah.com?56')).to eq(parsed + '?56') }
31
+ end
32
+
33
+ describe 'subdomain' do
34
+ let(:parsed) { 'http://www.fubah.com' }
35
+
36
+ it { expect(parse('www.fubah.com')).to eq(parsed) }
37
+ it { expect(parse('http://www.fubah.com')).to eq(parsed) }
38
+ it { expect(parse('http://www.fubah.com/f?56')).to eq(parsed + '/f?56') }
39
+ end
40
+ end
41
+
42
+ describe 'valid http' do
43
+ %w(
44
+ oo.com
45
+ oo.org
46
+ oo.net
47
+ foo.net
48
+ foo.net
49
+ foo.net
50
+ foo.net
51
+ 9foo.net
52
+ land.com.br
53
+ land.org.br
54
+ land.net.br
55
+ dom.land.co.uk
56
+ ad.dom.land.co.tk
57
+ http://foo.com
58
+ http://www.foo.com
59
+ ).each do |good_site|
60
+ it "should correctly parse '#{good_site}'" do
61
+ res = Memoria::Site.parse(good_site)
62
+ good_site = "http://#{good_site}" if good_site !~ /http/
63
+ expect(res.first.to_s).to eq(good_site)
64
+ expect(res.size).to eq(1)
65
+ end
66
+ end
67
+ end
68
+
69
+ describe 'invalid' do
70
+ %w(
71
+ @foo
72
+ foo@foo
73
+ zumbi@.com
74
+ @11 53 2355
75
+ @11532355
76
+ ).each do |bad_site|
77
+ it "should not parse '#{bad_site}'" do
78
+ expect(Memoria::Site.parse(bad_site).first).to be_nil
79
+ end
80
+ end
81
+ end
82
+ end