dif 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 03c5b2d2fad359b79dda2614b3611f607306d5be
4
+ data.tar.gz: e20d6cd0eecdd5019f68bf80326cb48e17481540
5
+ SHA512:
6
+ metadata.gz: a51b0386f859202a8746c94e455c65ee2fae3666addd7710bb91a7c7f93e2968c0b653b98d2fefc78cc5dc227acbcc14da92011ad73ad2e64c5473adb6fc28ff
7
+ data.tar.gz: 48c1b5a1d21829509ff674f12c026a112f5411962e125ce6a2e6818656f1ac73fdd6772afa33f955e00cc127ecbb366e01c5165ed486f69fb20b94b356977415
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in dif.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Carlos Troncoso
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # Dif
2
+
3
+ Dif provides a simple method to READ a dif (Data Interchange File Format),
4
+ and convert to a standard CSV file.
5
+ A WRITE module will follow in further versions, if there is any interest,
6
+ but as of now, being diff a legacy file format, the only use I've found is
7
+ to read from them.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'dif'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install dif
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Contributing
28
+
29
+ 1. Fork it
30
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
31
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
32
+ 4. Push to the branch (`git push origin my-new-feature`)
33
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/dif.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'dif/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "dif"
8
+ spec.version = Dif::VERSION
9
+ spec.authors = ["Carlos Troncoso"]
10
+ spec.email = ["ctroncoso@thinkmint.cl"]
11
+ spec.description = %q{
12
+ Dif provides a simple method to READ a dif (Data Interchange File Format),
13
+ and convert to a standard CSV file.
14
+ A WRITE module will follow in further versions, if there is any interest,
15
+ but as of now, being diff a legacy file format, the only use I've foud is
16
+ to read from them.}
17
+ spec.summary = %q{Provides a simple method to READ .dif files}
18
+ spec.homepage = ""
19
+ spec.license = "MIT"
20
+
21
+ spec.files = `git ls-files`.split($/)
22
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
23
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
24
+ spec.require_paths = ["lib"]
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.3"
27
+ spec.add_development_dependency "rake"
28
+ end
data/lib/dif.rb ADDED
@@ -0,0 +1,103 @@
1
+ # File Header has the following structure
2
+ # TABLE
3
+ # 0,1
4
+ # ""
5
+
6
+ # The number of rows comes after "VECTORS"
7
+ # VECTORS
8
+ # 0,21 <--- Second value
9
+ # ""
10
+
11
+ # The number of rows comes after "TUPLES"
12
+ # TUPLES
13
+ # 0,26032 <--- Second value
14
+ # ""
15
+
16
+ # Data marker. This is a 4 line structure that signals the begining of the DATA section
17
+ # DATA
18
+ # 0,0
19
+ # ""
20
+
21
+ # BOT Begining of Tuple. Actual data starts after this two line stucture.
22
+ # -1,0
23
+ # BOT
24
+
25
+ require "dif/version"
26
+ require "dif/line_helpers"
27
+ require "csv"
28
+
29
+ module Dif
30
+ class Reader
31
+ attr_reader :rows_count, :column_count, :lines, :csv
32
+ ALLOWED_COMMANDS = %w(EOD BOT)
33
+
34
+ def initialize(file, encoding="IBM850")
35
+ @lines = ::File.read(file, :external_encoding => encoding, :internal_encoding => "UTF-8").lines
36
+ @csv = CSV.new("", col_sep: "\t")
37
+ @line_buffer=Array.new
38
+ fix_lines
39
+ set_rows
40
+ set_columns
41
+ @lines.slice! 0..data_section_start_at_line
42
+ read_data
43
+ end
44
+
45
+ def export_csv
46
+ @csv.string
47
+ end
48
+
49
+ private
50
+
51
+ def set_rows
52
+ # find the line with VECTORS, move to the next, split, get last item, convert to integer and save in instance variable.
53
+ vector_line =lines.index("VECTORS")
54
+ @rows_count = lines[vector_line.next].split(",").last.to_i
55
+ end
56
+
57
+ def set_columns
58
+ # find the line with TUPLES, move to the next, split, get last item, convert to integer and save in instance variable.
59
+ tuple_line = lines.index("TUPLES")
60
+ @column_count = lines[tuple_line.next].split(",").last.to_i
61
+ end
62
+
63
+ def data_section_start_at_line
64
+ @data_section_start_at_line ||= lines.index("DATA")
65
+ end
66
+
67
+ def read_data
68
+ @lines.slice! 0..lines.index("BOT") # remove everything up to first BOT
69
+ lines.each_with_index do |line,index| #iterate over data section
70
+ # only looking for lines in the form of -1,15 (two digits with a comma between)
71
+ line_eval = /(?<command>-*\d),(?<value>\d+)/.match(line)
72
+ next if not line_eval
73
+
74
+ case line_eval[:command]
75
+ when "-1"
76
+ process_command(index)
77
+ when "0" #value
78
+ @line_buffer << line_eval[:value].to_i
79
+ when "1"
80
+ @line_buffer << lines[index.next].sub(/^"/,"").sub(/"$/,"")
81
+ end
82
+
83
+ end
84
+ end
85
+
86
+
87
+ def process_command(index)
88
+ command = lines[index.next]
89
+ raise "Command not in allowed list" if !ALLOWED_COMMANDS.include? command
90
+
91
+ csv << @line_buffer if !@line_buffer.empty?
92
+
93
+ case command
94
+ when "BOT"
95
+ @line_buffer.clear
96
+ when "EOD"
97
+ return
98
+ end
99
+ end
100
+
101
+
102
+ end
103
+ end
@@ -0,0 +1,14 @@
1
+ # These methods are a set of private methods that extends capabilities
2
+
3
+ module Dif
4
+ class Reader
5
+ private
6
+ def fix_lines
7
+ @lines.map! do |line|
8
+ line.chomp!
9
+ end
10
+ end
11
+
12
+
13
+ end
14
+ end
@@ -0,0 +1,3 @@
1
+ module Dif
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dif
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Carlos Troncoso
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: "\n Dif provides a simple method to READ a dif (Data Interchange File
42
+ Format),\n and convert to a standard CSV file.\n A WRITE module will follow
43
+ in further versions, if there is any interest, \n but as of now, being diff a
44
+ legacy file format, the only use I've foud is \n to read from them."
45
+ email:
46
+ - ctroncoso@thinkmint.cl
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - .gitignore
52
+ - Gemfile
53
+ - LICENSE.txt
54
+ - README.md
55
+ - Rakefile
56
+ - dif.gemspec
57
+ - lib/dif.rb
58
+ - lib/dif/line_helpers.rb
59
+ - lib/dif/version.rb
60
+ homepage: ''
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.1.4
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Provides a simple method to READ .dif files
84
+ test_files: []