houdinirx 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in houdini.gemspec
4
+ gemspec
data/README ADDED
@@ -0,0 +1,39 @@
1
+ A ruby DSL for building long complex regular expressions used frequently across large data extraction projects.
2
+
3
+ This is the simplest possible example, but given the following text:
4
+ t = "$10.99 (555) 444-3322 Boston, MA"
5
+
6
+ Instead of writing a regular expression like the following:
7
+ data = t.match(/\$(\d+[,\d]*\.\d+)\s*(\(\d{3}\)\s\d{3}-\d{4})\s*(\w+),\s(\w+)/)
8
+
9
+ and then retrieving the data with match result indices:
10
+ data[1] => "10.99"
11
+ data[2] => "(555) 444-3322"
12
+ data[3] => "Boston"
13
+ data[4] => "MA"
14
+
15
+ We can use Houdini to build the regular expression in easy to read and reusable pieces
16
+
17
+ Define a regular expression for use across your project:
18
+ Houdini.define(:word, /\w+/)
19
+
20
+ Call hmatch or hscan on a string and build your expression pieces with the r() method by defining the expression inline or using a pre-defined expression. Then build your match results with the m( ) method.
21
+
22
+ data = t.hmatch do
23
+ r("amount", /\d+[,\d]*\.\d+/)
24
+ r("phone_number", /\(\d{3}\)\s+\d{3}-\d{4}/)
25
+ r("city", :word)
26
+ r("state", :word)
27
+ m("\\$(amount) (phone_number) (city), (state)")
28
+ end
29
+
30
+ Now you can access the data as methods on the resulting object:
31
+
32
+ data.amount => "10.99"
33
+ data.phone_number => "(555) 444-3322"
34
+ data.city => "Boston"
35
+ data.state => "MA"
36
+
37
+ or access the original MatchData object
38
+
39
+ data.match => #<MatchData "$10.99 (555) 444-3322 Boston, MA" 1:"10.99" 2:"(555) 444-3322" 3:"Boston" 4:"MA">
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
data/houdinirx.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "houdinirx/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "houdinirx"
7
+ s.version = Houdini::VERSION
8
+ s.authors = ["Eric Campbell", "Duncan Mak"]
9
+ s.email = ["ericcampbell59@gmail.com", "duncanmak@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{A Ruby DSL for building regular expressions}
12
+ s.description = %q{A ruby DSL for building long complex regular expressions used frequently across large data extraction projects}
13
+
14
+ s.rubyforge_project = "houdinirx"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+ end
@@ -0,0 +1,27 @@
1
+ module Houdini
2
+ class MatchResult
3
+
4
+ attr_accessor :match, :captures
5
+
6
+ def initialize(match, captures)
7
+ @match = match
8
+
9
+ raise RuntimeError, "Regex failed" if @match.nil?
10
+ @captures = captures.collect {|c| c.to_sym }
11
+ end
12
+
13
+ def method_missing(method, *args, &block)
14
+ if @captures.include?(method)
15
+ index = @captures.index(method)
16
+ if @match.class == MatchData
17
+ @match.captures[index]
18
+ else
19
+ @match[index]
20
+ end
21
+ else
22
+ raise RuntimeError, "#{method} not captured"
23
+ end
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,54 @@
1
+ module Houdini
2
+ class RegexHelper
3
+ attr_accessor :expressions, :match_string, :captures, :regexp
4
+
5
+ def initialize
6
+ @expressions = []
7
+ @captures = []
8
+ @regexp = nil
9
+ end
10
+
11
+ def r(*args)
12
+ if args.size == 2
13
+ @expressions << [args[0], args[1]]
14
+ else
15
+ expression = args.pop
16
+ args.each do |name|
17
+ @expressions << [name, expression]
18
+ end
19
+ end
20
+ end
21
+
22
+ def m(match_str)
23
+ @match_string = match_str
24
+ end
25
+
26
+ def match(text, options=Regexp::IGNORECASE)
27
+ @regexp = match_string.gsub(/\s/, "\\s*")
28
+ replace_expressions!(@regexp)
29
+ r = Regexp.new(@regexp, options)
30
+ puts r.inspect
31
+ Houdini::MatchResult.new(text.match(r), @captures)
32
+ end
33
+
34
+ def scan(text, options=Regexp::IGNORECASE)
35
+ @regexp = match_string.gsub(/\s/, "\\s*")
36
+ replace_expressions!(@regexp)
37
+ r = Regexp.new(@regexp, options)
38
+ scans = text.scan(r)
39
+ scans.collect {|scan| Houdini::MatchResult.new(scan, @captures) }
40
+ end
41
+
42
+ protected
43
+ def replace_expressions!(regex)
44
+ @expressions.each do |name, r|
45
+ if regex =~ /\(#{name}\)/
46
+ @captures << name
47
+ end
48
+ r = Houdini[r].source if r.class == Symbol
49
+ r = r.source if r.class == Regexp
50
+ regex.gsub!(name, r)
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,13 @@
1
+ class String
2
+ def hmatch(options=nil, &block)
3
+ regex_helper = Houdini::RegexHelper.new()
4
+ regex_helper.instance_eval(&block)
5
+ regex_helper.match(self)
6
+ end
7
+
8
+ def hscan(options=nil, &block)
9
+ regex_helper = Houdini::RegexHelper.new()
10
+ regex_helper.instance_eval(&block)
11
+ regex_helper.scan(self)
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module Houdini
2
+ VERSION = "0.0.1"
3
+ end
data/lib/houdinirx.rb ADDED
@@ -0,0 +1,25 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require "houdinirx/version"
5
+ require "houdinirx/string"
6
+ require "houdinirx/regex_helper"
7
+ require "houdinirx/match_result"
8
+
9
+ module Houdini
10
+
11
+ class << self
12
+ def [](name)
13
+ definitions[name]
14
+ end
15
+
16
+ def define(name, regex)
17
+ definitions[name] = regex
18
+ end
19
+
20
+ def definitions
21
+ @@def ||= {}
22
+ end
23
+ end
24
+
25
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: houdinirx
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Eric Campbell
14
+ - Duncan Mak
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2011-08-01 00:00:00 Z
20
+ dependencies: []
21
+
22
+ description: A ruby DSL for building long complex regular expressions used frequently across large data extraction projects
23
+ email:
24
+ - ericcampbell59@gmail.com
25
+ - duncanmak@gmail.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files: []
31
+
32
+ files:
33
+ - .gitignore
34
+ - Gemfile
35
+ - README
36
+ - Rakefile
37
+ - houdinirx.gemspec
38
+ - lib/houdinirx.rb
39
+ - lib/houdinirx/match_result.rb
40
+ - lib/houdinirx/regex_helper.rb
41
+ - lib/houdinirx/string.rb
42
+ - lib/houdinirx/version.rb
43
+ homepage: ""
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options: []
48
+
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ requirements: []
70
+
71
+ rubyforge_project: houdinirx
72
+ rubygems_version: 1.8.5
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: A Ruby DSL for building regular expressions
76
+ test_files: []
77
+