darrell-activewarehouse-etl 0.9.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/CHANGELOG +198 -0
  2. data/LICENSE +7 -0
  3. data/README +99 -0
  4. data/Rakefile +175 -0
  5. data/TODO +28 -0
  6. data/bin/etl +28 -0
  7. data/bin/etl.cmd +8 -0
  8. data/examples/database.example.yml +16 -0
  9. data/lib/etl/batch/batch.rb +111 -0
  10. data/lib/etl/batch/directives.rb +55 -0
  11. data/lib/etl/batch.rb +2 -0
  12. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  13. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  14. data/lib/etl/builder.rb +2 -0
  15. data/lib/etl/commands/etl.rb +89 -0
  16. data/lib/etl/control/control.rb +405 -0
  17. data/lib/etl/control/destination/database_destination.rb +97 -0
  18. data/lib/etl/control/destination/file_destination.rb +126 -0
  19. data/lib/etl/control/destination.rb +448 -0
  20. data/lib/etl/control/source/database_source.rb +220 -0
  21. data/lib/etl/control/source/enumerable_source.rb +11 -0
  22. data/lib/etl/control/source/file_source.rb +90 -0
  23. data/lib/etl/control/source/model_source.rb +39 -0
  24. data/lib/etl/control/source.rb +109 -0
  25. data/lib/etl/control.rb +3 -0
  26. data/lib/etl/core_ext/time/calculations.rb +42 -0
  27. data/lib/etl/core_ext/time.rb +5 -0
  28. data/lib/etl/core_ext.rb +1 -0
  29. data/lib/etl/engine.rb +556 -0
  30. data/lib/etl/execution/base.rb +9 -0
  31. data/lib/etl/execution/batch.rb +8 -0
  32. data/lib/etl/execution/job.rb +8 -0
  33. data/lib/etl/execution/migration.rb +85 -0
  34. data/lib/etl/execution.rb +19 -0
  35. data/lib/etl/generator/generator.rb +20 -0
  36. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  37. data/lib/etl/generator.rb +2 -0
  38. data/lib/etl/http_tools.rb +139 -0
  39. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  40. data/lib/etl/parser/delimited_parser.rb +74 -0
  41. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  42. data/lib/etl/parser/parser.rb +41 -0
  43. data/lib/etl/parser/sax_parser.rb +218 -0
  44. data/lib/etl/parser/xml_parser.rb +65 -0
  45. data/lib/etl/parser.rb +11 -0
  46. data/lib/etl/processor/block_processor.rb +14 -0
  47. data/lib/etl/processor/bulk_import_processor.rb +83 -0
  48. data/lib/etl/processor/check_exist_processor.rb +80 -0
  49. data/lib/etl/processor/check_unique_processor.rb +35 -0
  50. data/lib/etl/processor/copy_field_processor.rb +26 -0
  51. data/lib/etl/processor/encode_processor.rb +55 -0
  52. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  53. data/lib/etl/processor/print_row_processor.rb +12 -0
  54. data/lib/etl/processor/processor.rb +25 -0
  55. data/lib/etl/processor/rename_processor.rb +24 -0
  56. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  57. data/lib/etl/processor/row_processor.rb +17 -0
  58. data/lib/etl/processor/sequence_processor.rb +23 -0
  59. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  60. data/lib/etl/processor/truncate_processor.rb +35 -0
  61. data/lib/etl/processor.rb +11 -0
  62. data/lib/etl/row.rb +20 -0
  63. data/lib/etl/screen/row_count_screen.rb +20 -0
  64. data/lib/etl/screen.rb +14 -0
  65. data/lib/etl/transform/block_transform.rb +13 -0
  66. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  67. data/lib/etl/transform/decode_transform.rb +51 -0
  68. data/lib/etl/transform/default_transform.rb +20 -0
  69. data/lib/etl/transform/foreign_key_lookup_transform.rb +169 -0
  70. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  71. data/lib/etl/transform/ordinalize_transform.rb +12 -0
  72. data/lib/etl/transform/sha1_transform.rb +13 -0
  73. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  74. data/lib/etl/transform/string_to_datetime_transform.rb +14 -0
  75. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  76. data/lib/etl/transform/transform.rb +61 -0
  77. data/lib/etl/transform/trim_transform.rb +26 -0
  78. data/lib/etl/transform/type_transform.rb +35 -0
  79. data/lib/etl/transform.rb +2 -0
  80. data/lib/etl/util.rb +59 -0
  81. data/lib/etl/version.rb +9 -0
  82. data/lib/etl.rb +83 -0
  83. metadata +245 -0
@@ -0,0 +1,20 @@
1
+ module ETL #:nodoc:
2
+ module Generator #:nodoc:
3
+ # Base class for generators.
4
+ class Generator
5
+ class << self
6
+ # Get the Class for the specified name.
7
+ #
8
+ # For example, if name is :surrogate_key then a SurrogateKeyGenerator class is returned
9
+ def class_for_name(name)
10
+ ETL::Generator.const_get("#{name.to_s.camelize}Generator")
11
+ end
12
+ end
13
+
14
+ # Generate the next value. This method must be implemented by subclasses
15
+ def next
16
+ raise "Must be implemented by a subclass"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,39 @@
1
+ # This source file contains code for a basic sequential surrogate key generator
2
+
3
+ module ETL #:nodoc:
4
+ module Generator #:nodoc:
5
+ # Surrogate key generator.
6
+ class SurrogateKeyGenerator < Generator
7
+ attr_reader :table
8
+ attr_reader :target
9
+ attr_reader :column
10
+ attr_reader :query
11
+
12
+ # Initialize the generator
13
+ def initialize(options={})
14
+ @table = options[:table]
15
+ @target = options[:target]
16
+ @column = options[:column] || 'id'
17
+ @query = options[:query]
18
+
19
+ if table
20
+ @surrogate_key = ETL::Engine.connection(target).select_value("SELECT max(#{column}) FROM #{table_name}")
21
+ elsif query
22
+ @surrogate_key = ETL::Engine.connection(target).select_value(query)
23
+ end
24
+ @surrogate_key = 0 if @surrogate_key.blank?
25
+ @surrogate_key = @surrogate_key.to_i
26
+ end
27
+
28
+ # Get the next surrogate key
29
+ def next
30
+ @surrogate_key ||= 0
31
+ @surrogate_key += 1
32
+ end
33
+
34
+ def table_name
35
+ ETL::Engine.table(table, ETL::Engine.connection(target))
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,2 @@
1
+ require 'etl/generator/generator'
2
+ Dir[File.dirname(__FILE__) + "/generator/*.rb"].each { |file| require(file) }
@@ -0,0 +1,139 @@
1
+ require 'uri'
2
+
3
+ # Module which has utility methods for HTTP.
4
+ module HttpTools
5
+ # Parse the given user agent string
6
+ #
7
+ # Code taken from http://gemtacular.com/gems/ParseUserAgent
8
+ def parse_user_agent(user_agent)
9
+ if '-' == user_agent
10
+ #raise 'Invalid User Agent'
11
+ #puts 'Invalid User Agent'
12
+ end
13
+
14
+ browser, browser_version_major, browser_version_minor, ostype, os, os_version = nil
15
+
16
+ # fix Opera
17
+ #useragent =~ s/Opera (\d)/Opera\/$1/i;
18
+ useragent = user_agent.gsub(/(Opera [\d])/,'Opera\1')
19
+
20
+ # grab all Agent/version strings as 'agents'
21
+ agents = Array.new
22
+ user_agent.split(/\s+/).each {|string|
23
+ if string =~ /\//
24
+ agents<< string
25
+ end
26
+ }
27
+
28
+ # cycle through the agents to set browser and version (MSIE is set later)
29
+ if agents && agents.length > 0
30
+ agents.each {|agent|
31
+ parts = agent.split('/')
32
+ browser = parts[0]
33
+ browser_version = parts[1]
34
+ if browser == 'Firefox'
35
+ browser_version_major = parts[1].slice(0,3)
36
+ browser_version_minor = parts[1].sub(browser_version_major,'').sub('.','')
37
+ elsif browser == 'Safari'
38
+ if parts[1].slice(0,3).to_f < 400
39
+ browser_version_major = '1'
40
+ else
41
+ browser_version_major = '2'
42
+ end
43
+ else
44
+ browser_version_major = parts[1].slice(0,1)
45
+ end
46
+ }
47
+ end
48
+
49
+ # grab all of the properties (within parens)
50
+ # should be in relation to the agent if possible
51
+ detail = user_agent
52
+ user_agent.gsub(/\((.*)\)/,'').split(/\s/).each {|part| detail = detail.gsub(part,'')}
53
+ detail = detail.gsub('(','').gsub(')','').lstrip
54
+ properties = detail.split(/;\s+/)
55
+
56
+ # cycle through the properties to set known quantities
57
+ properties.each do |property|
58
+ if property =~ /^Win/
59
+ ostype = 'Windows'
60
+ os = property
61
+ if parts = property.split(/ /,2)
62
+ if parts[1] =~ /^NT/
63
+ ostype = 'Windows'
64
+ subparts = parts[1].split(/ /,2)
65
+ if subparts[1] == '5'
66
+ os_version = '2000'
67
+ elsif subparts[1] == '5.1'
68
+ os_version = 'XP'
69
+ else
70
+ os_version = subparts[1]
71
+ end
72
+ end
73
+ end
74
+ end
75
+ if property == 'Macintosh'
76
+ ostype = 'Macintosh'
77
+ os = property
78
+ end
79
+ if property =~ /OS X/
80
+ ostype = 'Macintosh'
81
+ os_version = 'OS X'
82
+ os = property
83
+ end
84
+ if property =~ /^Linux/
85
+ ostype = 'Linux'
86
+ os = property
87
+ end
88
+ if property =~ /^MSIE/
89
+ browser = 'MSIE'
90
+ browser_version = property.gsub('MSIE ','').lstrip
91
+ browser_version_major,browser_version_minor = browser_version.split('.')
92
+ end
93
+ end
94
+
95
+ result = {
96
+ :browser => browser,
97
+ :browser_version_major => browser_version_major,
98
+ :browser_version_minor => browser_version_minor,
99
+ :ostype => ostype,
100
+ :os_version => os_version,
101
+ :os => os,
102
+ }
103
+ result.each do |key, value|
104
+ result[key] = value.blank? ? nil : value.strip
105
+ end
106
+ result
107
+ end
108
+
109
+ # Parse a URI. If options[:prefix] is set then prepend it to the keys for the hash that
110
+ # is returned.
111
+ def parse_uri(uri_string, options={})
112
+ prefix = options[:prefix] ||= ''
113
+ empty_hash = {
114
+ "#{prefix}scheme".to_sym => nil,
115
+ "#{prefix}host".to_sym => nil,
116
+ "#{prefix}port".to_sym => nil,
117
+ "#{prefix}uri_path".to_sym => nil,
118
+ "#{prefix}domain".to_sym => nil
119
+ }
120
+ if uri_string
121
+ #attempt to parse uri --if it's a uri then catch the problem and set everything to nil
122
+ begin
123
+ uri = URI.parse(uri_string)
124
+ results = {
125
+ "#{prefix}scheme".to_sym => uri.scheme,
126
+ "#{prefix}host".to_sym => uri.host,
127
+ "#{prefix}port".to_sym => uri.port,
128
+ "#{prefix}uri_path".to_sym => uri.path
129
+ }
130
+ results["#{prefix}domain".to_sym] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
131
+ results
132
+ rescue
133
+ empty_hash
134
+ end
135
+ else
136
+ empty_hash
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,49 @@
1
+ module ETL #:nodoc:
2
+ module Parser #:nodoc:
3
+ # Parser which can parser the Apache Combined Log Format as defined at
4
+ # http://httpd.apache.org/docs/2.2/logs.html
5
+ class ApacheCombinedLogParser < ETL::Parser::Parser
6
+ include HttpTools
7
+ def initialize(source, options={})
8
+ super
9
+ end
10
+
11
+ def each
12
+ Dir.glob(file).each do |file|
13
+ File.open(file).each_line do |line|
14
+ yield parse(line)
15
+ end
16
+ end
17
+ end
18
+
19
+ def parse(line)
20
+ # example line: 127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"
21
+ line =~ /^(\S+)\s(\S+)\s(\S+)\s\[([^\]]*)\]\s"([^"]*)"\s(\d*)\s(\d*)\s"([^"]*)"\s"([^"]*)"$/
22
+ fields = {
23
+ :ip_address => $1,
24
+ :identd => $2,
25
+ :user => $3,
26
+ :timestamp => $4,
27
+ :request => $5,
28
+ :response_code => $6,
29
+ :bytes => $7,
30
+ :referrer => $8,
31
+ :user_agent => $9,
32
+ }
33
+ #fields[:timestamp] =~ r%{(\d\d)/(\w\w\w)/(\d\d\d\d):(\d\d):(\d\d):(\d\d) -(\d\d\d\d)}
34
+ d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S') unless fields[:timestamp].nil?
35
+ fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction]) unless d.nil?
36
+
37
+ fields[:method], fields[:path] = fields[:request].split(/\s/)
38
+
39
+ fields.merge!(parse_user_agent(fields[:user_agent])) unless fields[:user_agent].nil?
40
+ fields.merge!(parse_uri(fields[:referrer], :prefix => 'referrer_'))
41
+
42
+ fields.each do |key, value|
43
+ fields[key] = nil if value == '-'
44
+ end
45
+ end
46
+
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,74 @@
1
+ module ETL #:nodoc:
2
+ module Parser #:nodoc:
3
+ # Parses delimited files
4
+ class DelimitedParser < ETL::Parser::Parser
5
+ # Initialize the parser
6
+ # * <tt>source</tt>: The Source object
7
+ # * <tt>options</tt>: Hash of options for the parser, defaults to an empty hash
8
+ def initialize(source, options={})
9
+ super
10
+ configure
11
+ end
12
+
13
+ # Returns each row.
14
+ def each
15
+ Dir.glob(file).each do |file|
16
+ ETL::Engine.logger.debug "parsing #{file}"
17
+ line = 0
18
+ lines_skipped = 0
19
+ FasterCSV.foreach(file, options) do |raw_row|
20
+ if lines_skipped < source.skip_lines
21
+ ETL::Engine.logger.debug "skipping line"
22
+ lines_skipped += 1
23
+ next
24
+ end
25
+ line += 1
26
+ row = {}
27
+ validate_row(raw_row, line, file)
28
+ raw_row.each_with_index do |value, index|
29
+ f = fields[index]
30
+ row[f.name] = value
31
+ end
32
+ yield row
33
+ end
34
+ end
35
+ end
36
+
37
+ # Get an array of defined fields
38
+ def fields
39
+ @fields ||= []
40
+ end
41
+
42
+ private
43
+ def validate_row(row, line, file)
44
+ ETL::Engine.logger.debug "validating line #{line} in file #{file}"
45
+ if row.length != fields.length
46
+ raise_with_info( MismatchError,
47
+ "The number of columns from the source (#{row.length}) does not match the number of columns in the definition (#{fields.length})",
48
+ line, file
49
+ )
50
+ end
51
+ end
52
+
53
+ def configure
54
+ source.definition.each do |options|
55
+ case options
56
+ when Symbol
57
+ fields << Field.new(options)
58
+ when Hash
59
+ fields << Field.new(options[:name])
60
+ else
61
+ raise DefinitionError, "Each field definition must either be a symbol or a hash"
62
+ end
63
+ end
64
+ end
65
+
66
+ class Field #:nodoc:
67
+ attr_reader :name
68
+ def initialize(name)
69
+ @name = name
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,65 @@
1
+ module ETL #:nodoc:
2
+ module Parser #:nodoc:
3
+ # Parser for fixed with files
4
+ class FixedWidthParser < ETL::Parser::Parser
5
+ # Initialize the parser
6
+ # * <tt>source</tt>: The source object
7
+ # * <tt>options</tt>: Parser options Hash
8
+ def initialize(source, options={})
9
+ super
10
+ configure
11
+ end
12
+
13
+ # Return each row
14
+ def each
15
+ Dir.glob(file).each do |file|
16
+ open(file).each do |line|
17
+ row = {}
18
+ lines_skipped = 0
19
+ fields.each do |name, f|
20
+ if lines_skipped < source.skip_lines
21
+ lines_skipped += 1
22
+ next
23
+ end
24
+ # TODO make strip optional?
25
+ row[name] = line[f.field_start, f.field_length].strip
26
+ end
27
+ yield row
28
+ end
29
+ end
30
+ end
31
+
32
+ # Return a map of defined fields
33
+ def fields
34
+ @fields ||= {}
35
+ end
36
+
37
+ private
38
+ def configure
39
+ source.definition.each do |field, options|
40
+ fields[field] = FixedWidthField.new(
41
+ options[:name], options[:start], options[:end], options[:length]
42
+ )
43
+ end
44
+ end
45
+ end
46
+
47
+ class FixedWidthField #:nodoc:
48
+ attr_reader :name, :field_start, :field_end, :field_length
49
+ # Initialize the field.
50
+ def initialize(name, field_start, field_end=nil, field_length=nil)
51
+ @name = name
52
+ @field_start = field_start - 1
53
+ if field_end
54
+ @field_end = field_end
55
+ @field_length = @field_end - @field_start
56
+ elsif field_length
57
+ @field_length = field_length
58
+ @field_end = @field_start + @field_length
59
+ else
60
+ raise DefinitionError, "Either field_end or field_length required"
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,41 @@
1
+ module ETL #:nodoc:
2
+ module Parser #:nodoc:
3
+ # Base parser class. Implementation classes must extend this class and implement
4
+ # the each method. The each method should return each row of the source data as
5
+ # a Hash.
6
+ class Parser
7
+ include Enumerable
8
+ class << self
9
+ # Convert the name (string or symbol) to a parser class.
10
+ #
11
+ # Example:
12
+ # <tt>class_for_name(:fixed_width)</tt> returns a FixedWidthParser class
13
+ def class_for_name(name)
14
+ ETL::Parser.const_get("#{name.to_s.camelize}Parser")
15
+ end
16
+ end
17
+
18
+ # The Source object for the data
19
+ attr_reader :source
20
+
21
+ # Options Hash for the parser
22
+ attr_reader :options
23
+
24
+ def initialize(source, options={})
25
+ @source = source
26
+ @options = options || {}
27
+ end
28
+
29
+ protected
30
+ def file
31
+ path = Pathname.new(source.configuration[:file])
32
+ path = path.absolute? ? path : Pathname.new(File.dirname(source.control.file)) + path
33
+ path
34
+ end
35
+
36
+ def raise_with_info(error, message, file, line)
37
+ raise error, "#{message} (line #{line} in #{file})"
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,218 @@
1
+ require 'rexml/parsers/sax2parser'
2
+ require 'rexml/sax2listener'
3
+
4
+ module ETL #:nodoc:
5
+ module Parser #:nodoc:
6
+ # ETL parser implementation which uses SAX to parse XML files.
7
+ class SaxParser < ETL::Parser::Parser
8
+
9
+ # The write trigger causes whatever values are currently specified for the row to be returned.
10
+ # After returning the values will not be cleared, thus allowing for values which are assigned
11
+ # higher in the XML tree to remain in memory.
12
+ attr_accessor :write_trigger
13
+
14
+ # Initialize the parser
15
+ # * <tt>source</tt>: The Source object
16
+ # * <tt>options</tt>: Parser options Hash
17
+ def initialize(source, options={})
18
+ super
19
+ configure
20
+ end
21
+
22
+ # Returns each row
23
+ def each(&block)
24
+ Dir.glob(file).each do |file|
25
+ parser = REXML::Parsers::SAX2Parser.new(File.new(file))
26
+ listener = Listener.new(self, &block)
27
+ parser.listen(listener)
28
+ parser.parse
29
+ end
30
+ end
31
+
32
+ # Get an array of Field objects
33
+ def fields
34
+ @fields ||= []
35
+ end
36
+
37
+ private
38
+ def configure
39
+ #puts "write trigger in source.definition: #{source.definition[:write_trigger]}"
40
+ self.write_trigger = source.definition[:write_trigger]
41
+ # map paths to field names
42
+ source.definition[:fields].each do |name, path|
43
+ #puts "defined field #{name}, path: #{path}"
44
+ fields << Field.new(name, XPath::Path.parse(path))
45
+ end
46
+ end
47
+
48
+ # Class representing a field to be loaded from the source
49
+ class Field
50
+ # The name of the field
51
+ attr_reader :name
52
+ # The XPath-like path to the field in the XML document
53
+ attr_reader :path
54
+
55
+ def initialize(name, path) #:nodoc
56
+ @name = name
57
+ @path = path
58
+ end
59
+ end
60
+ end
61
+
62
+ class Listener #:nodoc:
63
+ include REXML::SAX2Listener
64
+ def initialize(parser, &block)
65
+ @parser = parser
66
+ @row = {}
67
+ @value = nil
68
+ @proc = Proc.new(&block)
69
+ end
70
+ def cdata(text)
71
+ @value << text
72
+ end
73
+ def characters(text)
74
+ text = text.strip
75
+ if (!text.nil? && text != '')
76
+ @value ||= ''
77
+ @value << text
78
+ end
79
+ end
80
+ def start_document
81
+ @path = XPath::Path.new
82
+ end
83
+ def end_document
84
+
85
+ end
86
+ def start_element(uri, localname, qname, attributes)
87
+ element = XPath::Element.new(localname, attributes)
88
+ @path.elements << element
89
+
90
+ @parser.fields.each do |field|
91
+ #puts "#{@path} match? #{field.path}"
92
+ if @path.match?(field.path)
93
+ #puts "field.path: #{field.path}"
94
+ if field.path.is_attribute?
95
+ #puts "setting @row[#{field.name}] to #{element.attributes[field.path.attribute]}"
96
+ @row[field.name] = element.attributes[field.path.attribute]
97
+ end
98
+ end
99
+ end
100
+ end
101
+ def end_element(uri, localname, qname)
102
+ element = @path.elements.last
103
+
104
+ @parser.fields.each do |field|
105
+ #puts "#{@path} match? #{field.path}"
106
+ if @path.match?(field.path)
107
+ #puts "field.path: #{field.path}"
108
+ if !field.path.is_attribute?
109
+ @row[field.name] = @value
110
+ end
111
+ end
112
+ end
113
+
114
+ #puts @path.to_s
115
+ if @path.match?(@parser.write_trigger)
116
+ #puts "matched: #{@path} =~ #{@parser.write_trigger}"
117
+ #puts "calling proc with #{@row.inspect}"
118
+ @proc.call(@row.clone)
119
+ end
120
+
121
+ @value = nil
122
+ @path.elements.pop
123
+ end
124
+ def progress(position)
125
+ @position = position
126
+ end
127
+ end
128
+
129
+ # Module which contains classes that are used for XPath-like filtering
130
+ # on the SAX parser
131
+ module XPath #:nodoc:
132
+ class Path #:nodoc:
133
+ # Get the elements in the path
134
+ attr_accessor :elements
135
+
136
+ # Initialize
137
+ def initialize
138
+ @elements = []
139
+ end
140
+
141
+ # Convert to a string representation
142
+ def to_s
143
+ @elements.map{ |e| e.to_s }.join("/")
144
+ end
145
+
146
+ # Returns true if the last part of the path refers to an attribute
147
+ def is_attribute?
148
+ elements.last.attributes.length > 0
149
+ end
150
+
151
+ # Return the name of the attribute referenced by the last element in this path. Returns nil if the last element
152
+ # does not reference an attribute.
153
+ #
154
+ # Warning: the path must only reference a single attribute, otherwise the result of this method will be random,
155
+ # since attributes are stored in a Hash.
156
+ def attribute
157
+ return nil unless is_attribute?
158
+ elements.last.attributes.keys.first
159
+ end
160
+
161
+ # Return true if this XPath::Path matches the given path string. This is a fail-fast match, so the first mismatch
162
+ # will cause the method to return false.
163
+ def match?(s)
164
+ path = Path.parse(s)
165
+ return false unless path.elements.length == elements.length
166
+ elements.each_with_index do |element, index|
167
+ path_element = path.elements[index]
168
+ return false if path_element.nil?
169
+ return false if element.name != path_element.name
170
+ path_element.attributes.each do |key, value|
171
+ return false unless element.attributes[key] =~ value
172
+ end
173
+ end
174
+ return true
175
+ end
176
+
177
+ # Parse the string into an XPath::Path object
178
+ def self.parse(s)
179
+ return s if s.is_a?(Path)
180
+ path = Path.new
181
+ parts = s.split('/')
182
+ parts.each_with_index do |part, i|
183
+ attributes = {}
184
+ part.gsub!(/(.*)\[(.*)\]/, '\1')
185
+ if !$2.nil?
186
+ $2.split(",").each do |pair|
187
+ key, value = pair.split("=")
188
+ value = ".*" if value.nil?
189
+ attributes[key] = Regexp.new(value)
190
+ end
191
+ end
192
+ path.elements << Element.new(part, attributes)
193
+ end
194
+ path
195
+ end
196
+ end
197
+ class Element #:nodoc
198
+ attr_reader :name
199
+ attr_reader :attributes
200
+ def initialize(name, attributes={})
201
+ @name = name
202
+ @attributes = attributes
203
+ end
204
+ def to_s
205
+ s = "#{name}"
206
+ if !@attributes.empty?
207
+ attr_str = @attributes.collect do |key,value|
208
+ value = value.source if value.is_a?(Regexp)
209
+ "#{key}=#{value}"
210
+ end.join(",")
211
+ s << "[" + attr_str + "]"
212
+ end
213
+ s
214
+ end
215
+ end
216
+ end
217
+ end
218
+ end