determine 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/Gemfile.lock +11 -0
- data/README.md +1 -0
- data/determine.gemspec +17 -0
- data/lib/determine.rb +11 -0
- data/lib/determine/determination.rb +19 -0
- data/lib/determine/determiner.rb +49 -0
- data/lib/determine/version.rb +3 -0
- data/lib/determine/web_page.rb +30 -0
- metadata +69 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
WIP - v0.1.0
|
data/determine.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
$:.unshift File.join(__FILE__, '..', 'lib')
|
2
|
+
require 'determine/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'determine'
|
6
|
+
s.version = ::Determine::VERSION
|
7
|
+
s.authors = ['Tyler Smith']
|
8
|
+
s.homepage = 'https://github.com/tyler-smith/determine'
|
9
|
+
s.email = 'tylersmith.me@gmail.com'
|
10
|
+
s.summary = 'Provides tools for creating heuristic filters for text'
|
11
|
+
s.description = 'Allows you to build classes that brute force determinations from text'
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.require_paths = ['lib']
|
15
|
+
|
16
|
+
s.add_runtime_dependency 'nokogiri'
|
17
|
+
end
|
data/lib/determine.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
# Create our root module so we don't need to worry about it later, add ourselves
|
5
|
+
# to the load path, and get to work
|
6
|
+
module Determine
|
7
|
+
module Exceptions;end
|
8
|
+
end
|
9
|
+
|
10
|
+
$:.unshift File.dirname(__FILE__)
|
11
|
+
Dir[File.join(File.dirname(__FILE__), 'determine', '**','*.rb')].each { |f| require f }
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Determine
|
2
|
+
class Determination
|
3
|
+
# Accessor for getting our ordered list of handlers
|
4
|
+
def handlers
|
5
|
+
@handlers ||= ['default']
|
6
|
+
end
|
7
|
+
|
8
|
+
# Takes the text and tries each handler on it until we get a determination
|
9
|
+
# TODO: in future extend this to allow multiple determinations
|
10
|
+
def determine(page, *args)
|
11
|
+
handlers.each do |handler|
|
12
|
+
result = self.send(handler, page, *args) if self.respond_to?(handler)
|
13
|
+
return result if result
|
14
|
+
end
|
15
|
+
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Determine
|
2
|
+
class Determiner
|
3
|
+
class << self
|
4
|
+
# A list of the possible determinations we can make
|
5
|
+
def determinations
|
6
|
+
@determinations ||= {}
|
7
|
+
end
|
8
|
+
|
9
|
+
# DSL-esque call for adding determinations
|
10
|
+
# determ is the name e.g. :business_name
|
11
|
+
# handler is a Determination subclass that does the work
|
12
|
+
def determination(determ, handler, *args)
|
13
|
+
determinations[determ] = {:handler => handler.new, :args => args}
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(text, cache=nil)
|
18
|
+
if text =~ URI.regexp
|
19
|
+
@page = WebPage.new(text, cache)
|
20
|
+
else
|
21
|
+
text_with_forced_scheme = "http://#{text}"
|
22
|
+
|
23
|
+
if text_with_forced_scheme =~ URI.regexp
|
24
|
+
text = text_with_forced_scheme
|
25
|
+
@page = WebPage.new(text, cache)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Pass the webpage to the hander and have it get to work
|
31
|
+
def determine(determ, *args)
|
32
|
+
if determ.to_sym == :all
|
33
|
+
data = {}
|
34
|
+
|
35
|
+
self.class.determinations.keys.each do |key|
|
36
|
+
data[key] = self.determine(key)
|
37
|
+
end
|
38
|
+
|
39
|
+
return data
|
40
|
+
end
|
41
|
+
|
42
|
+
determiner = self.class.determinations[determ.to_sym]
|
43
|
+
raise "Determination #{determ.to_sym} not found" if determiner.nil?
|
44
|
+
|
45
|
+
args = determiner[:args].map{|arg| self.determine(arg) } + args
|
46
|
+
return determiner[:handler].determine(@page, *args)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Determine
|
2
|
+
class WebPage
|
3
|
+
attr_accessor :uri
|
4
|
+
|
5
|
+
def initialize(uri, cache=nil)
|
6
|
+
# Parse the uri ensuring it has a protocol specified
|
7
|
+
@uri = URI(uri)
|
8
|
+
@uri = URI("http://#{uri}") if @uri.scheme.nil?
|
9
|
+
@raw_source = cache
|
10
|
+
end
|
11
|
+
|
12
|
+
# Alias for uri.host
|
13
|
+
def domain
|
14
|
+
@domain ||= uri.host
|
15
|
+
end
|
16
|
+
|
17
|
+
# Get the html parsed with nokogiri
|
18
|
+
def source
|
19
|
+
@source ||= Nokogiri::HTML(raw_source, nil, 'UTF-8')
|
20
|
+
end
|
21
|
+
|
22
|
+
def raw_source
|
23
|
+
# Force the string to only use correct UTF-8 characters
|
24
|
+
@raw_source ||= begin
|
25
|
+
params = {:read_timeout => 30}
|
26
|
+
::Iconv.new('UTF-8//IGNORE', 'UTF-8').iconv(@uri.read(params) + ' ')[0..-2]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: determine
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Tyler Smith
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-14 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Allows you to build classes that brute force determinations from text
|
31
|
+
email: tylersmith.me@gmail.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- Gemfile
|
37
|
+
- Gemfile.lock
|
38
|
+
- README.md
|
39
|
+
- determine.gemspec
|
40
|
+
- lib/determine.rb
|
41
|
+
- lib/determine/determination.rb
|
42
|
+
- lib/determine/determiner.rb
|
43
|
+
- lib/determine/version.rb
|
44
|
+
- lib/determine/web_page.rb
|
45
|
+
homepage: https://github.com/tyler-smith/determine
|
46
|
+
licenses: []
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
requirements: []
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.8.24
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: Provides tools for creating heuristic filters for text
|
69
|
+
test_files: []
|