determine 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+ source "http://gems.github.com"
3
+
4
+ gem 'nokogiri'
data/Gemfile.lock ADDED
@@ -0,0 +1,11 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ remote: http://gems.github.com/
4
+ specs:
5
+ nokogiri (1.5.5)
6
+
7
+ PLATFORMS
8
+ ruby
9
+
10
+ DEPENDENCIES
11
+ nokogiri
data/README.md ADDED
@@ -0,0 +1 @@
1
+ WIP - v0.1.0
data/determine.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ $:.unshift File.join(__FILE__, '..', 'lib')
2
+ require 'determine/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'determine'
6
+ s.version = ::Determine::VERSION
7
+ s.authors = ['Tyler Smith']
8
+ s.homepage = 'https://github.com/tyler-smith/determine'
9
+ s.email = 'tylersmith.me@gmail.com'
10
+ s.summary = 'Provides tools for creating heuristic filters for text'
11
+ s.description = 'Allows you to build classes that brute force determinations from text'
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.require_paths = ['lib']
15
+
16
+ s.add_runtime_dependency 'nokogiri'
17
+ end
data/lib/determine.rb ADDED
@@ -0,0 +1,11 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ # Create our root module so we don't need to worry about it later, add ourselves
5
+ # to the load path, and get to work
6
+ module Determine
7
+ module Exceptions;end
8
+ end
9
+
10
+ $:.unshift File.dirname(__FILE__)
11
+ Dir[File.join(File.dirname(__FILE__), 'determine', '**','*.rb')].each { |f| require f }
@@ -0,0 +1,19 @@
1
+ module Determine
2
+ class Determination
3
+ # Accessor for getting our ordered list of handlers
4
+ def handlers
5
+ @handlers ||= ['default']
6
+ end
7
+
8
+ # Takes the text and tries each handler on it until we get a determination
9
+ # TODO: in future extend this to allow multiple determinations
10
+ def determine(page, *args)
11
+ handlers.each do |handler|
12
+ result = self.send(handler, page, *args) if self.respond_to?(handler)
13
+ return result if result
14
+ end
15
+
16
+ nil
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,49 @@
1
+ module Determine
2
+ class Determiner
3
+ class << self
4
+ # A list of the possible determinations we can make
5
+ def determinations
6
+ @determinations ||= {}
7
+ end
8
+
9
+ # DSL-esque call for adding determinations
10
+ # determ is the name e.g. :business_name
11
+ # handler is a Determination subclass that does the work
12
+ def determination(determ, handler, *args)
13
+ determinations[determ] = {:handler => handler.new, :args => args}
14
+ end
15
+ end
16
+
17
+ def initialize(text, cache=nil)
18
+ if text =~ URI.regexp
19
+ @page = WebPage.new(text, cache)
20
+ else
21
+ text_with_forced_scheme = "http://#{text}"
22
+
23
+ if text_with_forced_scheme =~ URI.regexp
24
+ text = text_with_forced_scheme
25
+ @page = WebPage.new(text, cache)
26
+ end
27
+ end
28
+ end
29
+
30
+ # Pass the webpage to the hander and have it get to work
31
+ def determine(determ, *args)
32
+ if determ.to_sym == :all
33
+ data = {}
34
+
35
+ self.class.determinations.keys.each do |key|
36
+ data[key] = self.determine(key)
37
+ end
38
+
39
+ return data
40
+ end
41
+
42
+ determiner = self.class.determinations[determ.to_sym]
43
+ raise "Determination #{determ.to_sym} not found" if determiner.nil?
44
+
45
+ args = determiner[:args].map{|arg| self.determine(arg) } + args
46
+ return determiner[:handler].determine(@page, *args)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ module Determine
2
+ VERSION = '0.1.1'
3
+ end
@@ -0,0 +1,30 @@
1
+ module Determine
2
+ class WebPage
3
+ attr_accessor :uri
4
+
5
+ def initialize(uri, cache=nil)
6
+ # Parse the uri ensuring it has a protocol specified
7
+ @uri = URI(uri)
8
+ @uri = URI("http://#{uri}") if @uri.scheme.nil?
9
+ @raw_source = cache
10
+ end
11
+
12
+ # Alias for uri.host
13
+ def domain
14
+ @domain ||= uri.host
15
+ end
16
+
17
+ # Get the html parsed with nokogiri
18
+ def source
19
+ @source ||= Nokogiri::HTML(raw_source, nil, 'UTF-8')
20
+ end
21
+
22
+ def raw_source
23
+ # Force the string to only use correct UTF-8 characters
24
+ @raw_source ||= begin
25
+ params = {:read_timeout => 30}
26
+ ::Iconv.new('UTF-8//IGNORE', 'UTF-8').iconv(@uri.read(params) + ' ')[0..-2]
27
+ end
28
+ end
29
+ end
30
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: determine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tyler Smith
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Allows you to build classes that brute force determinations from text
31
+ email: tylersmith.me@gmail.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - Gemfile
37
+ - Gemfile.lock
38
+ - README.md
39
+ - determine.gemspec
40
+ - lib/determine.rb
41
+ - lib/determine/determination.rb
42
+ - lib/determine/determiner.rb
43
+ - lib/determine/version.rb
44
+ - lib/determine/web_page.rb
45
+ homepage: https://github.com/tyler-smith/determine
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.24
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Provides tools for creating heuristic filters for text
69
+ test_files: []