determine 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/Gemfile.lock +11 -0
- data/README.md +1 -0
- data/determine.gemspec +17 -0
- data/lib/determine.rb +11 -0
- data/lib/determine/determination.rb +19 -0
- data/lib/determine/determiner.rb +49 -0
- data/lib/determine/version.rb +3 -0
- data/lib/determine/web_page.rb +30 -0
- metadata +69 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
WIP - v0.1.0
|
data/determine.gemspec
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
$:.unshift File.join(__FILE__, '..', 'lib')
|
|
2
|
+
require 'determine/version'
|
|
3
|
+
|
|
4
|
+
Gem::Specification.new do |s|
|
|
5
|
+
s.name = 'determine'
|
|
6
|
+
s.version = ::Determine::VERSION
|
|
7
|
+
s.authors = ['Tyler Smith']
|
|
8
|
+
s.homepage = 'https://github.com/tyler-smith/determine'
|
|
9
|
+
s.email = 'tylersmith.me@gmail.com'
|
|
10
|
+
s.summary = 'Provides tools for creating heuristic filters for text'
|
|
11
|
+
s.description = 'Allows you to build classes that brute force determinations from text'
|
|
12
|
+
|
|
13
|
+
s.files = `git ls-files`.split("\n")
|
|
14
|
+
s.require_paths = ['lib']
|
|
15
|
+
|
|
16
|
+
s.add_runtime_dependency 'nokogiri'
|
|
17
|
+
end
|
data/lib/determine.rb
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
require 'open-uri'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
|
|
4
|
+
# Create our root module so we don't need to worry about it later, add ourselves
|
|
5
|
+
# to the load path, and get to work
|
|
6
|
+
module Determine
|
|
7
|
+
module Exceptions;end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
$:.unshift File.dirname(__FILE__)
|
|
11
|
+
Dir[File.join(File.dirname(__FILE__), 'determine', '**','*.rb')].each { |f| require f }
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Determine
|
|
2
|
+
class Determination
|
|
3
|
+
# Accessor for getting our ordered list of handlers
|
|
4
|
+
def handlers
|
|
5
|
+
@handlers ||= ['default']
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
# Takes the text and tries each handler on it until we get a determination
|
|
9
|
+
# TODO: in future extend this to allow multiple determinations
|
|
10
|
+
def determine(page, *args)
|
|
11
|
+
handlers.each do |handler|
|
|
12
|
+
result = self.send(handler, page, *args) if self.respond_to?(handler)
|
|
13
|
+
return result if result
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
nil
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
module Determine
|
|
2
|
+
class Determiner
|
|
3
|
+
class << self
|
|
4
|
+
# A list of the possible determinations we can make
|
|
5
|
+
def determinations
|
|
6
|
+
@determinations ||= {}
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# DSL-esque call for adding determinations
|
|
10
|
+
# determ is the name e.g. :business_name
|
|
11
|
+
# handler is a Determination subclass that does the work
|
|
12
|
+
def determination(determ, handler, *args)
|
|
13
|
+
determinations[determ] = {:handler => handler.new, :args => args}
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def initialize(text, cache=nil)
|
|
18
|
+
if text =~ URI.regexp
|
|
19
|
+
@page = WebPage.new(text, cache)
|
|
20
|
+
else
|
|
21
|
+
text_with_forced_scheme = "http://#{text}"
|
|
22
|
+
|
|
23
|
+
if text_with_forced_scheme =~ URI.regexp
|
|
24
|
+
text = text_with_forced_scheme
|
|
25
|
+
@page = WebPage.new(text, cache)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Pass the webpage to the hander and have it get to work
|
|
31
|
+
def determine(determ, *args)
|
|
32
|
+
if determ.to_sym == :all
|
|
33
|
+
data = {}
|
|
34
|
+
|
|
35
|
+
self.class.determinations.keys.each do |key|
|
|
36
|
+
data[key] = self.determine(key)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
return data
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
determiner = self.class.determinations[determ.to_sym]
|
|
43
|
+
raise "Determination #{determ.to_sym} not found" if determiner.nil?
|
|
44
|
+
|
|
45
|
+
args = determiner[:args].map{|arg| self.determine(arg) } + args
|
|
46
|
+
return determiner[:handler].determine(@page, *args)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
module Determine
|
|
2
|
+
class WebPage
|
|
3
|
+
attr_accessor :uri
|
|
4
|
+
|
|
5
|
+
def initialize(uri, cache=nil)
|
|
6
|
+
# Parse the uri ensuring it has a protocol specified
|
|
7
|
+
@uri = URI(uri)
|
|
8
|
+
@uri = URI("http://#{uri}") if @uri.scheme.nil?
|
|
9
|
+
@raw_source = cache
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Alias for uri.host
|
|
13
|
+
def domain
|
|
14
|
+
@domain ||= uri.host
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Get the html parsed with nokogiri
|
|
18
|
+
def source
|
|
19
|
+
@source ||= Nokogiri::HTML(raw_source, nil, 'UTF-8')
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def raw_source
|
|
23
|
+
# Force the string to only use correct UTF-8 characters
|
|
24
|
+
@raw_source ||= begin
|
|
25
|
+
params = {:read_timeout => 30}
|
|
26
|
+
::Iconv.new('UTF-8//IGNORE', 'UTF-8').iconv(@uri.read(params) + ' ')[0..-2]
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: determine
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.1
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- Tyler Smith
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2012-10-14 00:00:00.000000000 Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: nokogiri
|
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
|
17
|
+
none: false
|
|
18
|
+
requirements:
|
|
19
|
+
- - ! '>='
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '0'
|
|
22
|
+
type: :runtime
|
|
23
|
+
prerelease: false
|
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
26
|
+
requirements:
|
|
27
|
+
- - ! '>='
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
version: '0'
|
|
30
|
+
description: Allows you to build classes that brute force determinations from text
|
|
31
|
+
email: tylersmith.me@gmail.com
|
|
32
|
+
executables: []
|
|
33
|
+
extensions: []
|
|
34
|
+
extra_rdoc_files: []
|
|
35
|
+
files:
|
|
36
|
+
- Gemfile
|
|
37
|
+
- Gemfile.lock
|
|
38
|
+
- README.md
|
|
39
|
+
- determine.gemspec
|
|
40
|
+
- lib/determine.rb
|
|
41
|
+
- lib/determine/determination.rb
|
|
42
|
+
- lib/determine/determiner.rb
|
|
43
|
+
- lib/determine/version.rb
|
|
44
|
+
- lib/determine/web_page.rb
|
|
45
|
+
homepage: https://github.com/tyler-smith/determine
|
|
46
|
+
licenses: []
|
|
47
|
+
post_install_message:
|
|
48
|
+
rdoc_options: []
|
|
49
|
+
require_paths:
|
|
50
|
+
- lib
|
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
52
|
+
none: false
|
|
53
|
+
requirements:
|
|
54
|
+
- - ! '>='
|
|
55
|
+
- !ruby/object:Gem::Version
|
|
56
|
+
version: '0'
|
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
|
+
none: false
|
|
59
|
+
requirements:
|
|
60
|
+
- - ! '>='
|
|
61
|
+
- !ruby/object:Gem::Version
|
|
62
|
+
version: '0'
|
|
63
|
+
requirements: []
|
|
64
|
+
rubyforge_project:
|
|
65
|
+
rubygems_version: 1.8.24
|
|
66
|
+
signing_key:
|
|
67
|
+
specification_version: 3
|
|
68
|
+
summary: Provides tools for creating heuristic filters for text
|
|
69
|
+
test_files: []
|