parse_me 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.rvmrc +48 -0
- data/.travis.yml +4 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +50 -0
- data/Rakefile +10 -0
- data/lib/parse_me/fixed_width_parser.rb +11 -0
- data/lib/parse_me/input_splitting/fixed/base.rb +51 -0
- data/lib/parse_me/input_splitting/fixed/labels.rb +53 -0
- data/lib/parse_me/input_splitting/fixed.rb +40 -0
- data/lib/parse_me/input_splitting/var.rb +0 -0
- data/lib/parse_me/input_splitting.rb +7 -0
- data/lib/parse_me/parsed_object.rb +67 -0
- data/lib/parse_me/transformations.rb +9 -0
- data/lib/parse_me/validations.rb +11 -0
- data/lib/parse_me/var_width_parser.rb +6 -0
- data/lib/parse_me/version.rb +3 -0
- data/lib/parse_me.rb +8 -0
- data/parse_me.gemspec +19 -0
- data/spec/fixed_width/input_splitting_spec.rb +389 -0
- data/spec/fixed_width/parser_spec.rb +35 -0
- data/spec/namespace_spec.rb +20 -0
- data/spec/parsed_object/parsed_object_spec.rb +165 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/transformations/basic_spec.rb +124 -0
- data/spec/validation/basic_spec.rb +76 -0
- data/spec/validation/date_spec.rb +1 -0
- data/spec/validation/numeric_spec.rb +1 -0
- metadata +90 -0
data/.gitignore
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# This is an RVM Project .rvmrc file, used to automatically load the ruby
|
4
|
+
# development environment upon cd'ing into the directory
|
5
|
+
|
6
|
+
# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
|
7
|
+
# Only full ruby name is supported here, for short names use:
|
8
|
+
# echo "rvm use 1.9.3" > .rvmrc
|
9
|
+
environment_id="ruby-1.9.3@parse_me"
|
10
|
+
|
11
|
+
# Uncomment the following lines if you want to verify rvm version per project
|
12
|
+
# rvmrc_rvm_version="1.18.18 (version)" # 1.10.1 seams as a safe start
|
13
|
+
# eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
|
14
|
+
# echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
|
15
|
+
# return 1
|
16
|
+
# }
|
17
|
+
|
18
|
+
# First we attempt to load the desired environment directly from the environment
|
19
|
+
# file. This is very fast and efficient compared to running through the entire
|
20
|
+
# CLI and selector. If you want feedback on which environment was used then
|
21
|
+
# insert the word 'use' after --create as this triggers verbose mode.
|
22
|
+
if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
|
23
|
+
&& -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
|
24
|
+
then
|
25
|
+
\. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
|
26
|
+
[[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
|
27
|
+
\. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
|
28
|
+
else
|
29
|
+
# If the environment file has not yet been created, use the RVM CLI to select.
|
30
|
+
rvm --create "$environment_id" || {
|
31
|
+
echo "Failed to create RVM environment '${environment_id}'."
|
32
|
+
return 1
|
33
|
+
}
|
34
|
+
fi
|
35
|
+
|
36
|
+
# If you use bundler, this might be useful to you:
|
37
|
+
if [[ -s Gemfile ]] && {
|
38
|
+
! builtin command -v bundle >/dev/null ||
|
39
|
+
builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
|
40
|
+
}
|
41
|
+
then
|
42
|
+
printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
|
43
|
+
gem install bundler
|
44
|
+
fi
|
45
|
+
if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
|
46
|
+
then
|
47
|
+
bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
|
48
|
+
fi
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Serge Morales
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# ParseMe [](https://travis-ci.org/sergelerator/parse_me)
|
2
|
+
|
3
|
+
The `parse_me` gem aims to provide an easy to use plain text parser. Currently on early development, expect to be able to parse fixed and variable width files with something similar to:
|
4
|
+
|
5
|
+
rules = {
|
6
|
+
name: { required: true, length: 50 },
|
7
|
+
last_name: { required: true, length: 50 },
|
8
|
+
age: { numeric: :integer },
|
9
|
+
email: { required: true, email: true},
|
10
|
+
last_seen: { date: "YYYY-MM-DD" }
|
11
|
+
}
|
12
|
+
|
13
|
+
options = {
|
14
|
+
row_delimiter: "\r\n", # Defaults to "\n"
|
15
|
+
field_delimiter: "|"
|
16
|
+
}
|
17
|
+
|
18
|
+
result = ParseMe::VarWidthParser.parse(source_string, rules, options)
|
19
|
+
result.each do |record|
|
20
|
+
record.valid? # => Runs the validations specified in rules, returns true if all pass
|
21
|
+
record.attributes # => Returns a Hash with the parsed attributes, using rules' keys
|
22
|
+
record.name # => Returns the name attribute
|
23
|
+
record.errors # => Returns a collection of the errors encountered during validation
|
24
|
+
end
|
25
|
+
|
26
|
+
## Installation
|
27
|
+
|
28
|
+
Add this line to your application's Gemfile:
|
29
|
+
|
30
|
+
gem 'parse_me'
|
31
|
+
|
32
|
+
And then execute:
|
33
|
+
|
34
|
+
$ bundle
|
35
|
+
|
36
|
+
Or install it yourself as:
|
37
|
+
|
38
|
+
$ gem install parse_me
|
39
|
+
|
40
|
+
## Usage
|
41
|
+
|
42
|
+
Usage will be documented on the first Alpha release.
|
43
|
+
|
44
|
+
## Contributing
|
45
|
+
|
46
|
+
1. Fork it
|
47
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
48
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
49
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
50
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'parse_me/parsed_object'
|
2
|
+
require 'parse_me/input_splitting'
|
3
|
+
require 'parse_me/input_splitting/fixed'
|
4
|
+
require 'parse_me/input_splitting/var'
|
5
|
+
|
6
|
+
module ParseMe
|
7
|
+
class FixedWidthParser
|
8
|
+
extend InputSplitting::Fixed
|
9
|
+
include InputSplitting::Fixed
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module ParseMe
|
2
|
+
module InputSplitting
|
3
|
+
module Fixed
|
4
|
+
module Base
|
5
|
+
# Boolean check, validates the presence of a length rule for every attribute in the @rules
|
6
|
+
def valid_length_rules?
|
7
|
+
@rules and get_length_rules.reduce(true, :&) and valid_row_labels?
|
8
|
+
end
|
9
|
+
|
10
|
+
# Maps the length rules to an array
|
11
|
+
def get_length_rules
|
12
|
+
@length_rules ||= (labeled_rows? ? @rules.values.map(&:values).flatten : @rules.values).map{|a| a[:length]}
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns the fixed width of a file
|
16
|
+
def file_width
|
17
|
+
raise Exception.new("File width can't be properly calculated if length rules are invalid") unless valid_length_rules?
|
18
|
+
@file_width ||= (labeled_rows? ? @rules.first[1].values : @rules.values).map{|a| a[:length]}.reduce(0, :+) + label_length
|
19
|
+
end
|
20
|
+
|
21
|
+
# Creates the template string used to unpack a record
|
22
|
+
def unpack_pattern label = nil
|
23
|
+
(labeled_rows? ? "A#{label_length}" : '') + length_rules_for(label).map{|l| "A#{l}"}.join
|
24
|
+
end
|
25
|
+
|
26
|
+
# Splits the input into lines, uses the file width for this.
|
27
|
+
def split_input
|
28
|
+
@split_input ||= @source.delete("\n").scan(/.{#{file_width}}/)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Gets the list of attributes specified in @rules. Can optionally retrieve them from a labeled set of rules, the optional label argument should be supplied when the labeled_rows option is set.
|
32
|
+
def attributes label = nil
|
33
|
+
(labeled_rows? ? @rules[label] : @rules).keys
|
34
|
+
end
|
35
|
+
|
36
|
+
# Decodes the supplied string and returns the attributes hash
|
37
|
+
def member str
|
38
|
+
unpacked = str.unpack unpack_pattern
|
39
|
+
Hash[attributes.zip unpacked]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Maps a collection of attributes in the form of hashes.
|
43
|
+
def collection
|
44
|
+
split_input.map do |str|
|
45
|
+
labeled_rows? ? labeled_member(str, label_for(str)) : member(str)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module ParseMe
|
2
|
+
module InputSplitting
|
3
|
+
module Fixed
|
4
|
+
module Labels
|
5
|
+
# Boolean check for the parse option `:labeled_rows`
|
6
|
+
def labeled_rows?
|
7
|
+
@labeled_rows ||= @options[:labeled_rows]
|
8
|
+
end
|
9
|
+
|
10
|
+
# Boolean validation check, returns a truth value if all of the labels specified in the @rules are of the same length.
|
11
|
+
def valid_row_labels?
|
12
|
+
return true unless labeled_rows?
|
13
|
+
label_lengths.first == label_lengths.last
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns an array with the length rules in a given @rules configuration hash.
|
17
|
+
# If the label does not exist (or if the `labeled_rows?` switch is turned off)
|
18
|
+
# this method falls back to fetching the rules of the root @rules hash.
|
19
|
+
def length_rules_for label
|
20
|
+
(@rules[label] || @rules).values.map{|a| a[:length]}
|
21
|
+
end
|
22
|
+
|
23
|
+
# Gets an array with the labels specified in @rules.
|
24
|
+
def labels
|
25
|
+
@labels ||= @rules.keys.map(&:to_s).sort_by{|label| label.length }
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns an array with the lengths of the labels specified in @rules
|
29
|
+
def label_lengths
|
30
|
+
@label_lengths ||= labels.map(&:length).map(&:to_i)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns the length of the labels used in the file
|
34
|
+
def label_length
|
35
|
+
@label_length ||= labeled_rows? ? label_lengths.first : 0
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returns the label for a given string
|
39
|
+
def label_for str
|
40
|
+
str[0...label_length]
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns a Hash with the data from *str* formatted as an attribute list,
|
44
|
+
# with the keys being attribute names, and the values being the attribute
|
45
|
+
# values for that key.
|
46
|
+
def labeled_member str, label
|
47
|
+
(unpacked = str.unpack unpack_pattern(label)).shift
|
48
|
+
Hash[attributes(label).zip unpacked].merge({__label: label})
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'parse_me/input_splitting/fixed/labels'
|
2
|
+
require 'parse_me/input_splitting/fixed/base'
|
3
|
+
|
4
|
+
module ParseMe
|
5
|
+
module InputSplitting
|
6
|
+
module Fixed
|
7
|
+
def self.included(base)
|
8
|
+
base.instance_eval do
|
9
|
+
include Base
|
10
|
+
include Labels
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.extended(base)
|
15
|
+
base.instance_eval do
|
16
|
+
extend Base
|
17
|
+
extend Labels
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse source, rules, options = {}
|
22
|
+
@options = options
|
23
|
+
@options[:labeled_rows] ||= false
|
24
|
+
@options[:row_delimiter] ||= "\n"
|
25
|
+
|
26
|
+
@rules = rules
|
27
|
+
@source = source
|
28
|
+
|
29
|
+
unless valid_length_rules?
|
30
|
+
raise Exception.new('Fixed size file input requires a length rule for every attribute')
|
31
|
+
end
|
32
|
+
|
33
|
+
collection.map do |record|
|
34
|
+
ParsedObject.new(record, @rules)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
File without changes
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'parse_me/transformations'
|
2
|
+
require 'parse_me/validations'
|
3
|
+
|
4
|
+
module ParseMe
|
5
|
+
class ParsedObject
|
6
|
+
include ParseMe::Validations
|
7
|
+
include ParseMe::Transformations
|
8
|
+
|
9
|
+
attr_reader :attributes, :record_label, :rules, :errors
|
10
|
+
|
11
|
+
def initialize(attributes, rules = {})
|
12
|
+
@attributes = attributes
|
13
|
+
@record_label = @attributes.delete(:__label) if @attributes[:__label]
|
14
|
+
@rules = record_label ? rules[record_label] : rules
|
15
|
+
@errors = []
|
16
|
+
|
17
|
+
apply_validations
|
18
|
+
apply_transformations
|
19
|
+
end
|
20
|
+
|
21
|
+
def method_missing(name, *args, &b)
|
22
|
+
@attributes[name] || super
|
23
|
+
end
|
24
|
+
|
25
|
+
def transformations
|
26
|
+
@transformations ||= ParseMe::Transformations.public_instance_methods
|
27
|
+
end
|
28
|
+
|
29
|
+
def validation_rules
|
30
|
+
@validation_rules ||= ParseMe::Validations.public_instance_methods
|
31
|
+
end
|
32
|
+
|
33
|
+
def apply_transformations
|
34
|
+
rules.each do |attr, set|
|
35
|
+
(set.keys & transformations).each do |transformation|
|
36
|
+
transform(attr, transformation, set[transformation])
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def transform(attr, transformation, val)
|
42
|
+
@attributes[attr] = method(transformation).call(@attributes[attr], val)
|
43
|
+
end
|
44
|
+
|
45
|
+
def apply_validations
|
46
|
+
rules.each do |attr, set|
|
47
|
+
(set.keys & validation_rules).each do |validation|
|
48
|
+
validate(attr, validation, set[validation])
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def validate(attr, validation, val)
|
54
|
+
unless method(validation).call(@attributes[attr], val)
|
55
|
+
add_error("#{validation} validation failed for '#{attr}' => '#{val}'")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def valid?
|
60
|
+
errors.empty?
|
61
|
+
end
|
62
|
+
|
63
|
+
def add_error msg
|
64
|
+
errors << msg
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/parse_me.rb
ADDED
data/parse_me.gemspec
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'parse_me/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "parse_me"
|
8
|
+
gem.version = ParseMe::VERSION
|
9
|
+
gem.authors = ["Serge Morales"]
|
10
|
+
gem.email = ["i.serge23@gmail.com"]
|
11
|
+
gem.description = "Plain text file parser"
|
12
|
+
gem.summary = "Parse your fixed or variable width plain text files using your own custom layouts, easily validate and retrieve your data."
|
13
|
+
gem.homepage = "https://github.com/sergelerator/parse_me"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
end
|