RubyGems - parsey - Versions diffs - 0.1.3 → 0.2.0 - Mend

parsey 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/README.markdown CHANGED

@@ -1,12 +1,8 @@
 # parsey
-Parsey is a very simple class to match a string with a pattern and retrieve data from it.
-It takes a string, a pattern, and a hash of regexes. The pattern is filled with the regexes
-and then that is matched to the string given.
+Parsey is a simple class to match a string with a pattern and retrieve data from it. It takes a string, a pattern, and a hash of regular expressions (as strings). The pattern is filled with the regular expressions and then that is matched to the string given.
-The pattern uses {} to surround the name of the regex it should be replaced with. You can
-also use <> to surround parts of the pattern that are optional, though these obviously
-must be nested properly.
+The pattern uses {} to surround the name of the regex it should be replaced with. You can also use <> to surround parts of the pattern that are optional, though these obviously must be nested properly.
 ## Install
@@ -22,7 +18,7 @@ must be nested properly.
      #=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
     Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
-     #=> {"folder"=>nil, "file-name"=>"my file", "ext"=>"txt"}
+     #=> {"file-name"=>"my file", "ext"=>"txt"}
 ## Copyright

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.3
1	+ 0.2.0

data/lib/parsey.rb CHANGED

@@ -1,6 +1,8 @@
-# Parsey is a very simple class to match a string with a pattern and retrieve data from it.
-# It takes a string, a pattern, and a hash of regexes. The pattern is filled with the regexes
-# and then that is matched to the string given.
+require 'strscan'
+# Parsey is a simple class to match a string with a pattern and retrieve data from it. It
+# takes a string, a pattern, and a hash of regular expressions. The pattern is filled with the
+# regular expressiobs and then that is matched to the string given.
 #
 # The pattern uses {} to surround the name of the regex it should be replaced with. You can
 # also use <> to surround parts of the pattern that are optional, though these obviously
@@ -16,11 +18,18 @@
 #     #=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
 #
 #   Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
-#     #=> {"folder"=>nil, "file-name"=>"my file", "ext"=>"txt"}
+#     #=> {"file-name"=>"my file", "ext"=>"txt"}
 #
 class Parsey
-  attr_accessor :to_parse, :pattern, :partials, :data
+  class ParseError < StandardError; end
+  attr_accessor :to_parse, :pattern, :partials, :scanners
+  # Depth keeps track of how many levels the optional blocks go down, so that the scanner
+  # to use can be properly tracked. Each level of recursion needs a new scanner object
+  # to refer to or it will just clear the text that was stored.
+  attr_accessor :depth
   # Creates a new Parsey instance.
   #
@@ -36,95 +45,272 @@ class Parsey
     @pattern  = pattern
     @partials = partials
-    @data = {}
+    @scanners = []
+    @depth = -1
   end
-  # Runs through +pattern+ and replaces each of the keywords with the
-  # correct regex from +partials+. It then adds '()?' round any parts of
-  # the pattern marked optional. And turns the final string into a regex.
+  # This is a convenience method to allow you to easily parse something
+  # in just one line
+  #
+  # @param [String] to_parse
+  #   the string which is to be parsed
+  # @param [String] pattern
+  #   for the string to match
+  # @param [Hash{String => String}] partials
+  #   the regex patterns (as strings) to use when matching
+  #
+  # @return [Hash{String => String}]
+  #   the data retrieved from +to_parse+
   #
-  # @return [Regex]
-  #   the regular expression to match against when parsing
+  def self.parse(to_parse, pattern, partials)
+    a = Parsey.new(to_parse, pattern, partials)
+    a.parse
+  end
+  # This is a front for r_place so that a regex is returned as expected
   #
+  # @param [Array] pat the pattern to turn into a regular expression
+  # @return [Regexp] the regex that will be used for parsing
+  # @see r_place
   def regex
-    m = @pattern.gsub(/\{([a-z-]+)\}/) do
-      @partials[$1]
-    end
+    Regexp.new(r_place(scan))
+  end
+  # @return [StringScanner] the current scanner to use
+  def scanner
+    @scanners[@depth]
+  end
+  # Finds matches from +to_parse+ using #regex. Then uses this data
+  # and the pattern created with #scan to match the data with names.
+  #
+  # @return [Hash{String => String}]
+  #   the data taken fron +to_parse+
+  def parse
+    match = @to_parse.match(self.regex).captures
+    data = {}
-    # replace optional '<stuff>'
-    m.gsub!(/<(.+)>/) do
-      "(#{$1})?"
+    self.scan.flatten.each_with_type_indexed do |t, c, i|
+      if (t == :block) && (match[i] != nil)
+        data[c] = match[i]
+      end
     end
-    Regexp.new(m)
+    data
   end
-  # Gets the order of the different tags within the pattern. It inserts nil
-  # when it encounters an optional section so that it can easily be skipped
-  # during parsing.
+  # Need to reset scanners after every full run, so this provides a front
+  # for r_scan, which resets +scanners+ and still returns the correct value.
   #
-  # @return [Array]
-  #   the order in which the tags appear in the +pattern+
+  # @see #r_scan
+  # @return [ScanArray]
+  def scan
+    r = self.r_scan(@pattern)
+    @scanners =[]
+    r
+  end
+  # Creates a new StringScanner, then scans for blocks, optionals or text
+  # and adds the result to +parsed+ until it reaches the end of +str+.
   #
-  def order
-    if @pattern =~ /<(.+)>/
-      parts = @pattern.dup.split('<')
-      parts.insert(1, nil)
-      parts.collect! {|i|
-        i.split('>') unless i.nil?
-      }.flatten!
-      parts.collect! {|i|
-        i.split('}') unless i.nil?
-      }.flatten!
-      parts.collect! {|i|
-        i.gsub!(/[^a-zA-Z0-9_-]/, '') unless i.nil?
-      }
-      parts.delete_if {|i| i == ''}
-      return parts
-    else
-      parts = []
-      @pattern.gsub(/\{([a-z-]+)\}/) do
-        parts << $1
-      end
-      return parts
+  # @param [String] str the string to scan through
+  # @return [ScanArray]
+  def r_scan(str)
+    parsed = ScanArray.new
+    @depth += 1
+    @scanners[@depth] = StringScanner.new(str)
+    until self.scanner.eos?
+      a = scan_blocks ||  a = scan_optionals ||  a = scan_text
+      parsed << a
     end
+    @depth -= 1
+    parsed
   end
-  # This does the parsing of +to_parse+ using +regex+. It fills the hash
-  # +data+ using +order+ to match the data up with the correct name.
+  # Finds next {...} in the StringScanner, and checks that it is closed.
   #
-  # @return [Hash{String => String}]
-  #   the data retrieved from +to_parse+
+  # @return [Array]
+  #   an array of the form [:block, ...]
+  def scan_blocks
+    return unless self.scanner.scan(/\{/)
+    content = scan_until(:block)
+    raise ParseError unless self.scanner.scan(/\}/) # no closing block
+    raise NoPartialError unless @partials[content]
+    [:block, content]
+  end
+  # Finds next <...> in the StringScanner, and checks that it is closed.
+  # Then scans the contents of the optional block.
   #
-  def parse
-    @to_parse.match( self.regex ).captures.each_with_index do |item, i|
-      unless self.order[i].nil?
-        @data[ self.order[i] ] = item
-      end
-    end
-    @data
+  # @return [Array]
+  #   an array of the form [:optional, [...]]
+  def scan_optionals
+    return unless self.scanner.scan(/</)
+    content = scan_until(:optional)
+    raise ParseError unless self.scanner.scan(/>/) # no closing block
+    [:optional, r_scan(content)]
   end
-  # This is a convenience method to allow you to easily parse something
-  # in just one go!
+  # Finds plain text, and checks whether there are any blocks left.
   #
-  # @param [String] to_parse
-  #   the string which is to be parsed
-  # @param [String] pattern
-  #   for the string to match
-  # @param [Hash{String => String}] partials
-  #   the regex patterns (as strings) to use when matching
+  # @return [Array]
+  #   text before next block, or rest of text in the form [:text, ...]
+  def scan_text
+    text = scan_until(:open)
+    if text.nil?
+      text = self.scanner.rest
+      self.scanner.clear
+    end
+    [:text, text]
+  end
+  # Scans the string until a tag is found of the type given.
   #
-  # @return [Hash{String => String}]
-  #   the data retrieved from +to_parse+
+  # @param [Symbol] type of tag to look for.
+  #   +:block+ for a closing block tag (+}+),
+  #   +:optional+ for a closing optional tag (+>+),
+  #   +:open+ for an opening tag (+{+ or +<+).
+  # @return [String, nil]
+  #   the text before the tag, or nil if no match found
+  def scan_until(type)
+    case type
+    when :block
+      regex = /\}/
+    when :optional
+      regex = />/
+    when :open
+      regex = /(\{|<)/
+    end
+    pos = self.scanner.pos
+    if self.scanner.scan_until(regex)
+      self.scanner.pos -= self.scanner.matched.size
+      self.scanner.pre_match[pos..-1]
+    end
+  end
+  # Puts the regexps in the correct place, but returns a string so it can
+  # still work recursively
   #
-  def self.parse(to_parse, pattern, partials)
-    a = Parsey.new(to_parse, pattern, partials)
-    a.parse
+  # @param [ScanArray] pat the pattern to turn into a regular expression
+  # @return [String] the regular expression as a string
+  def r_place(pat)
+    str = ''
+    pat.each_with_type do |t, c|
+      case t
+      when :block
+        str << @partials[c]
+      when :text
+        str << c
+      when :optional
+        str << "(#{r_place(c)})?"
+      end
+    end
+    str
   end
+  # ScanArray is an array of tokens created when scanning the pattern.
+  # It looks like this:
+  #   [[:block, 'what-'], [:optional, [[:text, "hi-"]]], [:text, "oh"]]
+  #
+  class ScanArray < Array
+    # @see #flatten
+    def flatten!
+      self.replace(self.flatten)
+    end
+    # Removes all :text nodes from +pat+ and puts :optional nodes contents' into the
+    # main array, and puts a nil in place
+    #
+    # @return [Array]
+    #
+    # @example
+    #
+    #   sa = ScanArray.new([[:text, 'hey-'],
+    #                       [:optional,
+    #                         [[:block, '([a-z]+)'],
+    #                          [:text, '-what']]
+    #                      ]])
+    #
+    #   sa.flatten
+    #     #=> [[:optional, nil], [:block, "([a-z]+)"]]
+    #
+    def flatten
+      # Flatten the array with Array#flatten before starting
+      flat = super
+      indexes = []
+      flat.each_with_index do |v, i|
+        if v == :optional
+          indexes << i
+        end
+      end
+      # Need to start from the back so as not to alter the indexes of the
+      # other items when inserting
+      indexes.reverse.each do |i|
+        flat.insert(i+1, nil)
+      end
+      flat.reverse!
+      r = ScanArray.new
+      while flat.size > 0
+        r << [flat.pop, flat.pop]
+      end
+      r.delete_if {|i| i[0] == :text}
+      r
+    end
+    # Loops through the types and contents of each tag separately, passing them
+    # to the block given.
+    #
+    # @return [StringScanner] returns self
+    # @yield [Symbol, Object] gives the type and content of each block in turn
+    #
+    # @example
+    #
+    #   sa = ScanArray.new([[:text, 'hey-'],
+    #                       [:optional,
+    #                         [[:block, '([a-z]+)'],
+    #                          [:text, '-what']]
+    #                      ]])
+    #
+    #   sa.each_with_type do |type, content|
+    #     puts "#{type} -> #{content}"
+    #   end
+    #   #=> text -> hey-
+    #   #=> optional -> [[:block, "([a-z]+)"], [:text, "-what"]]
+    #
+    def each_with_type(&blck)
+      ts = self.collect {|i| i[0]}
+      cs = self.collect {|i| i[1]}
+      (0...ts.size).each do |i|
+        yield(ts[i], cs[i])
+      end
+      self
+    end
+    # @see #each_with_type
+    # @yield [Symbol, Object Integer] gives the type, content and index of each block in turn
+    def each_with_type_indexed(&blck)
+      ts = self.collect {|i| i[0]}
+      cs = self.collect {|i| i[1]}
+      (0...ts.size).each do |i|
+        yield(ts[i], cs[i], i)
+      end
+      self
+    end
+  end
 end

data/parsey.gemspec CHANGED

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{parsey}
-  s.version = "0.1.3"
+  s.version = "0.2.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Joshua Hawxwell"]
-  s.date = %q{2010-07-16}
+  s.date = %q{2010-07-22}
   s.description = %q{Parsey matches a string with a pattern to retrieve data from it.}
   s.email = %q{m@hawx.me}
   s.extra_rdoc_files = [

data/test/test_parsey.rb CHANGED

@@ -14,16 +14,18 @@ class TestParsey < Test::Unit::TestCase
     assert_equal Regexp.new("(f)?\/n.e"), t.regex
   end
-  should "create correct order" do
+  should "scan correctly" do
     partials = {'folder' => 'f', 'name' => 'n', 'ext' => 'e'}
     t = Parsey.new('', '<{folder}/>{name}.{ext}', partials)
-    assert_equal [nil, 'folder', 'name', 'ext'], t.order
+    r = [[ :optional, [[:block, "folder"], [:text, "/"]] ], [:block, "name"], [:text, "."], [:block, "ext"]]
+    assert_equal r, t.scan
   end
   should "create correct order when optional is in the middle" do
     partials = {'folder' => 'folder', 'name' => 'name', 'ext' => 'ext'}
     t = Parsey.new('', '{folder}/<{name}>.{ext}', partials)
-    assert_equal ['folder', nil, 'name', 'ext'], t.order
+    r = [[:block, "folder"], [:text, "/"], [:optional, [[:block, "name"]]], [:text, "."], [:block, "ext"]]
+    assert_equal r, t.scan
   end
   should "parse properly" do
@@ -33,4 +35,55 @@ class TestParsey < Test::Unit::TestCase
     assert_equal hash, t.parse
   end
+  should "parse long patterns properly" do
+    partials = {'word' => '([a-z]+)',
+                'number' => '([0-9]+)',
+                'date' => '(\d{4}-\d{2}-\d{2})',
+                'time' => '(\d{2}:\d{2})',
+                'person' => '(John|Dave|Luke|Josh)'}
+    pattern = 'Hello my name is {person}, I was born on {date} at {time}. I am {number} years old, and my favourite animal is a {word}.'
+    string = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
+    hash = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
+    assert_equal hash, Parsey.parse(string, pattern, partials)
+  end
+  should "parse multiple optionals correctly" do
+    partials = {'word' => '([a-z]+)',
+                'number' => '([0-9]+)',
+                'date' => '(\d{4}-\d{2}-\d{2})',
+                'time' => '(\d{2}:\d{2})',
+                'person' => '(John|Dave|Luke|Josh)'}
+    pattern = 'Hello my name is {person}, I was born on {date}< at {time}>. I am {number} years old<, and my favourite animal is a {word}>.'
+    string1 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
+    hash1 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
+    string2 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old.'
+    hash2 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17'}
+    string3 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old, and my favourite animal is a shark.'
+    hash3 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17', 'word' => 'shark'}
+    string4 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old.'
+    hash4 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17'}
+    assert_equal hash1, Parsey.parse(string1, pattern, partials)
+    assert_equal hash2, Parsey.parse(string2, pattern, partials)
+    assert_equal hash3, Parsey.parse(string3, pattern, partials)
+    assert_equal hash4, Parsey.parse(string4, pattern, partials)
+  end
+  should "raise an error when blocks not closed" do
+    assert_raise Parsey::ParseError do
+      Parsey.parse('what', '{question', {'question' => '([a-z ]+\?)'})
+    end
+  end
+  should "raise an error when optional not closed" do
+    assert_raise Parsey::ParseError do
+      Parsey.parse('hmm', '<{sound}', {'sound' => '(hmm|boo)'})
+    end
+  end
 end

metadata CHANGED

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: parsey
 version: !ruby/object:Gem::Version
-  hash: 29
+  hash: 23
   prerelease: false
   segments:
   - 0
-  - 1
-  - 3
-  version: 0.1.3
+  - 2
+  - 0
+  version: 0.2.0
 platform: ruby
 authors:
 - Joshua Hawxwell
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-07-16 00:00:00 +01:00
+date: 2010-07-22 00:00:00 +01:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency