parse_me 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rvmrc ADDED
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
+ # development environment upon cd'ing into the directory
5
+
6
+ # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
+ # Only full ruby name is supported here, for short names use:
8
+ # echo "rvm use 1.9.3" > .rvmrc
9
+ environment_id="ruby-1.9.3@parse_me"
10
+
11
+ # Uncomment the following lines if you want to verify rvm version per project
12
+ # rvmrc_rvm_version="1.18.18 (version)" # 1.10.1 seams as a safe start
13
+ # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14
+ # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15
+ # return 1
16
+ # }
17
+
18
+ # First we attempt to load the desired environment directly from the environment
19
+ # file. This is very fast and efficient compared to running through the entire
20
+ # CLI and selector. If you want feedback on which environment was used then
21
+ # insert the word 'use' after --create as this triggers verbose mode.
22
+ if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24
+ then
25
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26
+ [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27
+ \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28
+ else
29
+ # If the environment file has not yet been created, use the RVM CLI to select.
30
+ rvm --create "$environment_id" || {
31
+ echo "Failed to create RVM environment '${environment_id}'."
32
+ return 1
33
+ }
34
+ fi
35
+
36
+ # If you use bundler, this might be useful to you:
37
+ if [[ -s Gemfile ]] && {
38
+ ! builtin command -v bundle >/dev/null ||
39
+ builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
40
+ }
41
+ then
42
+ printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
43
+ gem install bundler
44
+ fi
45
+ if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
46
+ then
47
+ bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
48
+ fi
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - "1.9.2"
4
+ - "1.9.3"
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in parse_me.gemspec
4
+ gemspec
5
+
6
+ gem "rspec"
7
+ gem "simplecov"
8
+ gem "rake"
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Serge Morales
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # ParseMe [![Build Status](https://travis-ci.org/sergelerator/parse_me.png?branch=master)](https://travis-ci.org/sergelerator/parse_me)
2
+
3
+ The `parse_me` gem aims to provide an easy to use plain text parser. Currently on early development, expect to be able to parse fixed and variable width files with something similar to:
4
+
5
+ rules = {
6
+ name: { required: true, length: 50 },
7
+ last_name: { required: true, length: 50 },
8
+ age: { numeric: :integer },
9
+ email: { required: true, email: true},
10
+ last_seen: { date: "YYYY-MM-DD" }
11
+ }
12
+
13
+ options = {
14
+ row_delimiter: "\r\n", # Defaults to "\n"
15
+ field_delimiter: "|"
16
+ }
17
+
18
+ result = ParseMe::VarWidthParser.parse(source_string, rules, options)
19
+ result.each do |record|
20
+ record.valid? # => Runs the validations specified in rules, returns true if all pass
21
+ record.attributes # => Returns a Hash with the parsed attributes, using rules' keys
22
+ record.name # => Returns the name attribute
23
+ record.errors # => Returns a collection of the errors encountered during validation
24
+ end
25
+
26
+ ## Installation
27
+
28
+ Add this line to your application's Gemfile:
29
+
30
+ gem 'parse_me'
31
+
32
+ And then execute:
33
+
34
+ $ bundle
35
+
36
+ Or install it yourself as:
37
+
38
+ $ gem install parse_me
39
+
40
+ ## Usage
41
+
42
+ Usage will be documented on the first Alpha release.
43
+
44
+ ## Contributing
45
+
46
+ 1. Fork it
47
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
48
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
49
+ 4. Push to the branch (`git push origin my-new-feature`)
50
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ namespace :spec do
4
+ desc "Runs all the specs"
5
+ task :all do
6
+ system("bundle exec rspec")
7
+ end
8
+ end
9
+
10
+ task default: 'spec:all'
@@ -0,0 +1,11 @@
1
+ require 'parse_me/parsed_object'
2
+ require 'parse_me/input_splitting'
3
+ require 'parse_me/input_splitting/fixed'
4
+ require 'parse_me/input_splitting/var'
5
+
6
+ module ParseMe
7
+ class FixedWidthParser
8
+ extend InputSplitting::Fixed
9
+ include InputSplitting::Fixed
10
+ end
11
+ end
@@ -0,0 +1,51 @@
1
+ module ParseMe
2
+ module InputSplitting
3
+ module Fixed
4
+ module Base
5
+ # Boolean check, validates the presence of a length rule for every attribute in the @rules
6
+ def valid_length_rules?
7
+ @rules and get_length_rules.reduce(true, :&) and valid_row_labels?
8
+ end
9
+
10
+ # Maps the length rules to an array
11
+ def get_length_rules
12
+ @length_rules ||= (labeled_rows? ? @rules.values.map(&:values).flatten : @rules.values).map{|a| a[:length]}
13
+ end
14
+
15
+ # Returns the fixed width of a file
16
+ def file_width
17
+ raise Exception.new("File width can't be properly calculated if length rules are invalid") unless valid_length_rules?
18
+ @file_width ||= (labeled_rows? ? @rules.first[1].values : @rules.values).map{|a| a[:length]}.reduce(0, :+) + label_length
19
+ end
20
+
21
+ # Creates the template string used to unpack a record
22
+ def unpack_pattern label = nil
23
+ (labeled_rows? ? "A#{label_length}" : '') + length_rules_for(label).map{|l| "A#{l}"}.join
24
+ end
25
+
26
+ # Splits the input into lines, uses the file width for this.
27
+ def split_input
28
+ @split_input ||= @source.delete("\n").scan(/.{#{file_width}}/)
29
+ end
30
+
31
+ # Gets the list of attributes specified in @rules. Can optionally retrieve them from a labeled set of rules, the optional label argument should be supplied when the labeled_rows option is set.
32
+ def attributes label = nil
33
+ (labeled_rows? ? @rules[label] : @rules).keys
34
+ end
35
+
36
+ # Decodes the supplied string and returns the attributes hash
37
+ def member str
38
+ unpacked = str.unpack unpack_pattern
39
+ Hash[attributes.zip unpacked]
40
+ end
41
+
42
+ # Maps a collection of attributes in the form of hashes.
43
+ def collection
44
+ split_input.map do |str|
45
+ labeled_rows? ? labeled_member(str, label_for(str)) : member(str)
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,53 @@
1
+ module ParseMe
2
+ module InputSplitting
3
+ module Fixed
4
+ module Labels
5
+ # Boolean check for the parse option `:labeled_rows`
6
+ def labeled_rows?
7
+ @labeled_rows ||= @options[:labeled_rows]
8
+ end
9
+
10
+ # Boolean validation check, returns a truth value if all of the labels specified in the @rules are of the same length.
11
+ def valid_row_labels?
12
+ return true unless labeled_rows?
13
+ label_lengths.first == label_lengths.last
14
+ end
15
+
16
+ # Returns an array with the length rules in a given @rules configuration hash.
17
+ # If the label does not exist (or if the `labeled_rows?` switch is turned off)
18
+ # this method falls back to fetching the rules of the root @rules hash.
19
+ def length_rules_for label
20
+ (@rules[label] || @rules).values.map{|a| a[:length]}
21
+ end
22
+
23
+ # Gets an array with the labels specified in @rules.
24
+ def labels
25
+ @labels ||= @rules.keys.map(&:to_s).sort_by{|label| label.length }
26
+ end
27
+
28
+ # Returns an array with the lengths of the labels specified in @rules
29
+ def label_lengths
30
+ @label_lengths ||= labels.map(&:length).map(&:to_i)
31
+ end
32
+
33
+ # Returns the length of the labels used in the file
34
+ def label_length
35
+ @label_length ||= labeled_rows? ? label_lengths.first : 0
36
+ end
37
+
38
+ # Returns the label for a given string
39
+ def label_for str
40
+ str[0...label_length]
41
+ end
42
+
43
+ # Returns a Hash with the data from *str* formatted as an attribute list,
44
+ # with the keys being attribute names, and the values being the attribute
45
+ # values for that key.
46
+ def labeled_member str, label
47
+ (unpacked = str.unpack unpack_pattern(label)).shift
48
+ Hash[attributes(label).zip unpacked].merge({__label: label})
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,40 @@
1
+ require 'parse_me/input_splitting/fixed/labels'
2
+ require 'parse_me/input_splitting/fixed/base'
3
+
4
+ module ParseMe
5
+ module InputSplitting
6
+ module Fixed
7
+ def self.included(base)
8
+ base.instance_eval do
9
+ include Base
10
+ include Labels
11
+ end
12
+ end
13
+
14
+ def self.extended(base)
15
+ base.instance_eval do
16
+ extend Base
17
+ extend Labels
18
+ end
19
+ end
20
+
21
+ def parse source, rules, options = {}
22
+ @options = options
23
+ @options[:labeled_rows] ||= false
24
+ @options[:row_delimiter] ||= "\n"
25
+
26
+ @rules = rules
27
+ @source = source
28
+
29
+ unless valid_length_rules?
30
+ raise Exception.new('Fixed size file input requires a length rule for every attribute')
31
+ end
32
+
33
+ collection.map do |record|
34
+ ParsedObject.new(record, @rules)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
40
+ end
File without changes
@@ -0,0 +1,7 @@
1
+ require 'parse_me/input_splitting/fixed'
2
+ require 'parse_me/input_splitting/var'
3
+
4
+ module ParseMe
5
+ module InputSplitting
6
+ end
7
+ end
@@ -0,0 +1,67 @@
1
+ require 'parse_me/transformations'
2
+ require 'parse_me/validations'
3
+
4
+ module ParseMe
5
+ class ParsedObject
6
+ include ParseMe::Validations
7
+ include ParseMe::Transformations
8
+
9
+ attr_reader :attributes, :record_label, :rules, :errors
10
+
11
+ def initialize(attributes, rules = {})
12
+ @attributes = attributes
13
+ @record_label = @attributes.delete(:__label) if @attributes[:__label]
14
+ @rules = record_label ? rules[record_label] : rules
15
+ @errors = []
16
+
17
+ apply_validations
18
+ apply_transformations
19
+ end
20
+
21
+ def method_missing(name, *args, &b)
22
+ @attributes[name] || super
23
+ end
24
+
25
+ def transformations
26
+ @transformations ||= ParseMe::Transformations.public_instance_methods
27
+ end
28
+
29
+ def validation_rules
30
+ @validation_rules ||= ParseMe::Validations.public_instance_methods
31
+ end
32
+
33
+ def apply_transformations
34
+ rules.each do |attr, set|
35
+ (set.keys & transformations).each do |transformation|
36
+ transform(attr, transformation, set[transformation])
37
+ end
38
+ end
39
+ end
40
+
41
+ def transform(attr, transformation, val)
42
+ @attributes[attr] = method(transformation).call(@attributes[attr], val)
43
+ end
44
+
45
+ def apply_validations
46
+ rules.each do |attr, set|
47
+ (set.keys & validation_rules).each do |validation|
48
+ validate(attr, validation, set[validation])
49
+ end
50
+ end
51
+ end
52
+
53
+ def validate(attr, validation, val)
54
+ unless method(validation).call(@attributes[attr], val)
55
+ add_error("#{validation} validation failed for '#{attr}' => '#{val}'")
56
+ end
57
+ end
58
+
59
+ def valid?
60
+ errors.empty?
61
+ end
62
+
63
+ def add_error msg
64
+ errors << msg
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,9 @@
1
+ module ParseMe
2
+ module Transformations
3
+ def ghost_decimal_scale(input, rule_value)
4
+ raise Exception("ghost_decimal_scale should be able to ve converted to int") unless (val = rule_value.to_i)
5
+
6
+ input.to_f / (10**val)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ module ParseMe
2
+ module Validations
3
+ def length input, rule_value
4
+ input.to_s.length <= rule_value.to_i
5
+ end
6
+
7
+ def required input, rule_value
8
+ (not rule_value) || ((!input.nil?) && (input.to_s.strip.length > 0))
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,6 @@
1
+ require 'parse_me/parsed_object'
2
+
3
+ module ParseMe
4
+ class VarWidthParser
5
+ end
6
+ end
@@ -0,0 +1,3 @@
1
+ module ParseMe
2
+ VERSION = "0.0.2"
3
+ end
data/lib/parse_me.rb ADDED
@@ -0,0 +1,8 @@
1
+ require "parse_me/version"
2
+ require "parse_me/fixed_width_parser"
3
+ require "parse_me/var_width_parser"
4
+ require "parse_me/parsed_object"
5
+ require "parse_me/transformations"
6
+
7
+ module ParseMe
8
+ end
data/parse_me.gemspec ADDED
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'parse_me/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "parse_me"
8
+ gem.version = ParseMe::VERSION
9
+ gem.authors = ["Serge Morales"]
10
+ gem.email = ["i.serge23@gmail.com"]
11
+ gem.description = "Plain text file parser"
12
+ gem.summary = "Parse your fixed or variable width plain text files using your own custom layouts, easily validate and retrieve your data."
13
+ gem.homepage = "https://github.com/sergelerator/parse_me"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end