houdinirx 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in houdini.gemspec
4
+ gemspec
data/README ADDED
@@ -0,0 +1,39 @@
1
+ A ruby DSL for building long complex regular expressions used frequently across large data extraction projects.
2
+
3
+ This is the simplest possible example, but given the following text:
4
+ t = "$10.99 (555) 444-3322 Boston, MA"
5
+
6
+ Instead of writing a regular expression like the following:
7
+ data = t.match(/\$(\d+[,\d]*\.\d+)\s*(\(\d{3}\)\s\d{3}-\d{4})\s*(\w+),\s(\w+)/)
8
+
9
+ and then retrieving the data with match result indices:
10
+ data[1] => "10.99"
11
+ data[2] => "(555) 444-3322"
12
+ data[3] => "Boston"
13
+ data[4] => "MA"
14
+
15
+ We can use Houdini to build the regular expression in easy to read and reusable pieces
16
+
17
+ Define a regular expression for use across your project:
18
+ Houdini.define(:word, /\w+/)
19
+
20
+ Call hmatch or hscan on a string and build your expression pieces with the r() method by defining the expression inline or using a pre-defined expression. Then build your match results with the m( ) method.
21
+
22
+ data = t.hmatch do
23
+ r("amount", /\d+[,\d]*\.\d+/)
24
+ r("phone_number", /\(\d{3}\)\s+\d{3}-\d{4}/)
25
+ r("city", :word)
26
+ r("state", :word)
27
+ m("\\$(amount) (phone_number) (city), (state)")
28
+ end
29
+
30
+ Now you can access the data as methods on the resulting object:
31
+
32
+ data.amount => "10.99"
33
+ data.phone_number => "(555) 444-3322"
34
+ data.city => "Boston"
35
+ data.state => "MA"
36
+
37
+ or access the original MatchData object
38
+
39
+ data.match => #<MatchData "$10.99 (555) 444-3322 Boston, MA" 1:"10.99" 2:"(555) 444-3322" 3:"Boston" 4:"MA">
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
data/houdinirx.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "houdinirx/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "houdinirx"
7
+ s.version = Houdini::VERSION
8
+ s.authors = ["Eric Campbell", "Duncan Mak"]
9
+ s.email = ["ericcampbell59@gmail.com", "duncanmak@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{A Ruby DSL for building regular expressions}
12
+ s.description = %q{A ruby DSL for building long complex regular expressions used frequently across large data extraction projects}
13
+
14
+ s.rubyforge_project = "houdinirx"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+ end
@@ -0,0 +1,27 @@
1
+ module Houdini
2
+ class MatchResult
3
+
4
+ attr_accessor :match, :captures
5
+
6
+ def initialize(match, captures)
7
+ @match = match
8
+
9
+ raise RuntimeError, "Regex failed" if @match.nil?
10
+ @captures = captures.collect {|c| c.to_sym }
11
+ end
12
+
13
+ def method_missing(method, *args, &block)
14
+ if @captures.include?(method)
15
+ index = @captures.index(method)
16
+ if @match.class == MatchData
17
+ @match.captures[index]
18
+ else
19
+ @match[index]
20
+ end
21
+ else
22
+ raise RuntimeError, "#{method} not captured"
23
+ end
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,54 @@
1
+ module Houdini
2
+ class RegexHelper
3
+ attr_accessor :expressions, :match_string, :captures, :regexp
4
+
5
+ def initialize
6
+ @expressions = []
7
+ @captures = []
8
+ @regexp = nil
9
+ end
10
+
11
+ def r(*args)
12
+ if args.size == 2
13
+ @expressions << [args[0], args[1]]
14
+ else
15
+ expression = args.pop
16
+ args.each do |name|
17
+ @expressions << [name, expression]
18
+ end
19
+ end
20
+ end
21
+
22
+ def m(match_str)
23
+ @match_string = match_str
24
+ end
25
+
26
+ def match(text, options=Regexp::IGNORECASE)
27
+ @regexp = match_string.gsub(/\s/, "\\s*")
28
+ replace_expressions!(@regexp)
29
+ r = Regexp.new(@regexp, options)
30
+ puts r.inspect
31
+ Houdini::MatchResult.new(text.match(r), @captures)
32
+ end
33
+
34
+ def scan(text, options=Regexp::IGNORECASE)
35
+ @regexp = match_string.gsub(/\s/, "\\s*")
36
+ replace_expressions!(@regexp)
37
+ r = Regexp.new(@regexp, options)
38
+ scans = text.scan(r)
39
+ scans.collect {|scan| Houdini::MatchResult.new(scan, @captures) }
40
+ end
41
+
42
+ protected
43
+ def replace_expressions!(regex)
44
+ @expressions.each do |name, r|
45
+ if regex =~ /\(#{name}\)/
46
+ @captures << name
47
+ end
48
+ r = Houdini[r].source if r.class == Symbol
49
+ r = r.source if r.class == Regexp
50
+ regex.gsub!(name, r)
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,13 @@
1
+ class String
2
+ def hmatch(options=nil, &block)
3
+ regex_helper = Houdini::RegexHelper.new()
4
+ regex_helper.instance_eval(&block)
5
+ regex_helper.match(self)
6
+ end
7
+
8
+ def hscan(options=nil, &block)
9
+ regex_helper = Houdini::RegexHelper.new()
10
+ regex_helper.instance_eval(&block)
11
+ regex_helper.scan(self)
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module Houdini
2
+ VERSION = "0.0.1"
3
+ end
data/lib/houdinirx.rb ADDED
@@ -0,0 +1,25 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require "houdinirx/version"
5
+ require "houdinirx/string"
6
+ require "houdinirx/regex_helper"
7
+ require "houdinirx/match_result"
8
+
9
+ module Houdini
10
+
11
+ class << self
12
+ def [](name)
13
+ definitions[name]
14
+ end
15
+
16
+ def define(name, regex)
17
+ definitions[name] = regex
18
+ end
19
+
20
+ def definitions
21
+ @@def ||= {}
22
+ end
23
+ end
24
+
25
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: houdinirx
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Eric Campbell
14
+ - Duncan Mak
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2011-08-01 00:00:00 Z
20
+ dependencies: []
21
+
22
+ description: A ruby DSL for building long complex regular expressions used frequently across large data extraction projects
23
+ email:
24
+ - ericcampbell59@gmail.com
25
+ - duncanmak@gmail.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files: []
31
+
32
+ files:
33
+ - .gitignore
34
+ - Gemfile
35
+ - README
36
+ - Rakefile
37
+ - houdinirx.gemspec
38
+ - lib/houdinirx.rb
39
+ - lib/houdinirx/match_result.rb
40
+ - lib/houdinirx/regex_helper.rb
41
+ - lib/houdinirx/string.rb
42
+ - lib/houdinirx/version.rb
43
+ homepage: ""
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options: []
48
+
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ requirements: []
70
+
71
+ rubyforge_project: houdinirx
72
+ rubygems_version: 1.8.5
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: A Ruby DSL for building regular expressions
76
+ test_files: []
77
+