determine 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+ source "http://gems.github.com"
3
+
4
+ gem 'nokogiri'
data/Gemfile.lock ADDED
@@ -0,0 +1,11 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ remote: http://gems.github.com/
4
+ specs:
5
+ nokogiri (1.5.5)
6
+
7
+ PLATFORMS
8
+ ruby
9
+
10
+ DEPENDENCIES
11
+ nokogiri
data/README.md ADDED
@@ -0,0 +1 @@
1
+ WIP - v0.1.0
data/determine.gemspec ADDED
@@ -0,0 +1,17 @@
1
+ $:.unshift File.join(__FILE__, '..', 'lib')
2
+ require 'determine/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'determine'
6
+ s.version = ::Determine::VERSION
7
+ s.authors = ['Tyler Smith']
8
+ s.homepage = 'https://github.com/tyler-smith/determine'
9
+ s.email = 'tylersmith.me@gmail.com'
10
+ s.summary = 'Provides tools for creating heuristic filters for text'
11
+ s.description = 'Allows you to build classes that brute force determinations from text'
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.require_paths = ['lib']
15
+
16
+ s.add_runtime_dependency 'nokogiri'
17
+ end
data/lib/determine.rb ADDED
@@ -0,0 +1,11 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ # Create our root module so we don't need to worry about it later, add ourselves
5
+ # to the load path, and get to work
6
+ module Determine
7
+ module Exceptions;end
8
+ end
9
+
10
+ $:.unshift File.dirname(__FILE__)
11
+ Dir[File.join(File.dirname(__FILE__), 'determine', '**','*.rb')].each { |f| require f }
@@ -0,0 +1,19 @@
1
+ module Determine
2
+ class Determination
3
+ # Accessor for getting our ordered list of handlers
4
+ def handlers
5
+ @handlers ||= ['default']
6
+ end
7
+
8
+ # Takes the text and tries each handler on it until we get a determination
9
+ # TODO: in future extend this to allow multiple determinations
10
+ def determine(page, *args)
11
+ handlers.each do |handler|
12
+ result = self.send(handler, page, *args) if self.respond_to?(handler)
13
+ return result if result
14
+ end
15
+
16
+ nil
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,49 @@
1
+ module Determine
2
+ class Determiner
3
+ class << self
4
+ # A list of the possible determinations we can make
5
+ def determinations
6
+ @determinations ||= {}
7
+ end
8
+
9
+ # DSL-esque call for adding determinations
10
+ # determ is the name e.g. :business_name
11
+ # handler is a Determination subclass that does the work
12
+ def determination(determ, handler, *args)
13
+ determinations[determ] = {:handler => handler.new, :args => args}
14
+ end
15
+ end
16
+
17
+ def initialize(text, cache=nil)
18
+ if text =~ URI.regexp
19
+ @page = WebPage.new(text, cache)
20
+ else
21
+ text_with_forced_scheme = "http://#{text}"
22
+
23
+ if text_with_forced_scheme =~ URI.regexp
24
+ text = text_with_forced_scheme
25
+ @page = WebPage.new(text, cache)
26
+ end
27
+ end
28
+ end
29
+
30
+ # Pass the webpage to the hander and have it get to work
31
+ def determine(determ, *args)
32
+ if determ.to_sym == :all
33
+ data = {}
34
+
35
+ self.class.determinations.keys.each do |key|
36
+ data[key] = self.determine(key)
37
+ end
38
+
39
+ return data
40
+ end
41
+
42
+ determiner = self.class.determinations[determ.to_sym]
43
+ raise "Determination #{determ.to_sym} not found" if determiner.nil?
44
+
45
+ args = determiner[:args].map{|arg| self.determine(arg) } + args
46
+ return determiner[:handler].determine(@page, *args)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ module Determine
2
+ VERSION = '0.1.1'
3
+ end
@@ -0,0 +1,30 @@
1
+ module Determine
2
+ class WebPage
3
+ attr_accessor :uri
4
+
5
+ def initialize(uri, cache=nil)
6
+ # Parse the uri ensuring it has a protocol specified
7
+ @uri = URI(uri)
8
+ @uri = URI("http://#{uri}") if @uri.scheme.nil?
9
+ @raw_source = cache
10
+ end
11
+
12
+ # Alias for uri.host
13
+ def domain
14
+ @domain ||= uri.host
15
+ end
16
+
17
+ # Get the html parsed with nokogiri
18
+ def source
19
+ @source ||= Nokogiri::HTML(raw_source, nil, 'UTF-8')
20
+ end
21
+
22
+ def raw_source
23
+ # Force the string to only use correct UTF-8 characters
24
+ @raw_source ||= begin
25
+ params = {:read_timeout => 30}
26
+ ::Iconv.new('UTF-8//IGNORE', 'UTF-8').iconv(@uri.read(params) + ' ')[0..-2]
27
+ end
28
+ end
29
+ end
30
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: determine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tyler Smith
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Allows you to build classes that brute force determinations from text
31
+ email: tylersmith.me@gmail.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - Gemfile
37
+ - Gemfile.lock
38
+ - README.md
39
+ - determine.gemspec
40
+ - lib/determine.rb
41
+ - lib/determine/determination.rb
42
+ - lib/determine/determiner.rb
43
+ - lib/determine/version.rb
44
+ - lib/determine/web_page.rb
45
+ homepage: https://github.com/tyler-smith/determine
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.24
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: Provides tools for creating heuristic filters for text
69
+ test_files: []