bismas 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +10 -0
- data/README +1 -1
- data/bin/bismas-chardiff +30 -0
- data/bin/bismas-filter +30 -0
- data/bin/{bismas → bismas2xml} +1 -1
- data/lib/bismas.rb +2 -2
- data/lib/bismas/cli.rb +13 -98
- data/lib/bismas/cli/chardiff.rb +116 -0
- data/lib/bismas/cli/filter.rb +131 -0
- data/lib/bismas/cli/xml.rb +97 -0
- data/lib/bismas/{categories.rb → schema.rb} +31 -17
- data/lib/bismas/version.rb +1 -1
- data/lib/bismas/writer.rb +4 -2
- data/spec/bismas/reader_spec.rb +15 -3
- data/spec/bismas/schema_spec.rb +56 -0
- data/spec/data/test.cat +64 -0
- data/spec/spec_helper.rb +1 -0
- metadata +24 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a7bfa4d757177bec894da5ec52a0e5c19aa9d9e
|
4
|
+
data.tar.gz: fd631175f071d9b1815e30d14c634878824eaeca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5afed0589b2e33679976c397e032610e25fb7bc9b997fb4ede71ea53c2d81162cfefe2a20d16bc3e9d8f423419c87eeb8d97fff628bfa937340ada17ab727c4c
|
7
|
+
data.tar.gz: 84e252197825b23ed413abbe78eaebf6675148db8ca1c7c64e9bcc29acbbc11082c1616a1067682a765f76ec362c86c39ddbe1ca29e4225d07517c6446265c27
|
data/ChangeLog
CHANGED
@@ -2,6 +2,16 @@
|
|
2
2
|
|
3
3
|
= Revision history for bismas
|
4
4
|
|
5
|
+
== 0.3.0 [2015-12-11]
|
6
|
+
|
7
|
+
* Added +bismas2xml+ executable.
|
8
|
+
* Added +bismas-chardiff+ executable.
|
9
|
+
* Renamed +bismas+ executable to +bismas-filter+.
|
10
|
+
* Renamed Bismas::Categories to Bismas::Schema and extended scope.
|
11
|
+
* Fixed Bismas.amend_encoding to use default encoding when encoding option is
|
12
|
+
+nil+.
|
13
|
+
* Changed Bismas::Writer to insert key field at beginning of record.
|
14
|
+
|
5
15
|
== 0.2.0 [2015-12-03]
|
6
16
|
|
7
17
|
* Added <tt>.CAT</tt> parser.
|
data/README
CHANGED
data/bin/bismas-chardiff
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# bismas -- A Ruby client for BISMAS databases #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2015 Jens Wille #
|
9
|
+
# #
|
10
|
+
# Authors: #
|
11
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
|
+
# #
|
13
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
14
|
+
# under the terms of the GNU Affero General Public License as published by #
|
15
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
16
|
+
# option) any later version. #
|
17
|
+
# #
|
18
|
+
# bismas is distributed in the hope that it will be useful, but #
|
19
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
20
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
21
|
+
# License for more details. #
|
22
|
+
# #
|
23
|
+
# You should have received a copy of the GNU Affero General Public License #
|
24
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
25
|
+
# #
|
26
|
+
###############################################################################
|
27
|
+
#++
|
28
|
+
|
29
|
+
require 'bismas/cli'
|
30
|
+
Bismas::CLI::Chardiff.execute
|
data/bin/bismas-filter
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# bismas -- A Ruby client for BISMAS databases #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2015 Jens Wille #
|
9
|
+
# #
|
10
|
+
# Authors: #
|
11
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
|
+
# #
|
13
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
14
|
+
# under the terms of the GNU Affero General Public License as published by #
|
15
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
16
|
+
# option) any later version. #
|
17
|
+
# #
|
18
|
+
# bismas is distributed in the hope that it will be useful, but #
|
19
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
20
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
21
|
+
# License for more details. #
|
22
|
+
# #
|
23
|
+
# You should have received a copy of the GNU Affero General Public License #
|
24
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
25
|
+
# #
|
26
|
+
###############################################################################
|
27
|
+
#++
|
28
|
+
|
29
|
+
require 'bismas/cli'
|
30
|
+
Bismas::CLI::Filter.execute
|
data/bin/{bismas → bismas2xml}
RENAMED
data/lib/bismas.rb
CHANGED
@@ -131,7 +131,7 @@ module Bismas
|
|
131
131
|
end
|
132
132
|
|
133
133
|
def amend_encoding(options, default_encoding = DEFAULT_ENCODING)
|
134
|
-
encoding = options
|
134
|
+
encoding = (options[:encoding] || default_encoding).to_s
|
135
135
|
|
136
136
|
options[:encoding] = encoding.start_with?(':') ?
|
137
137
|
default_encoding.to_s + encoding : encoding
|
@@ -142,8 +142,8 @@ module Bismas
|
|
142
142
|
end
|
143
143
|
|
144
144
|
require_relative 'bismas/base'
|
145
|
+
require_relative 'bismas/schema'
|
145
146
|
require_relative 'bismas/reader'
|
146
147
|
require_relative 'bismas/writer'
|
147
148
|
require_relative 'bismas/mapping'
|
148
|
-
require_relative 'bismas/categories'
|
149
149
|
require_relative 'bismas/version'
|
data/lib/bismas/cli.rb
CHANGED
@@ -31,109 +31,24 @@ module Bismas
|
|
31
31
|
|
32
32
|
class CLI < Cyclops
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
config: 'config.yaml',
|
41
|
-
input: '-',
|
42
|
-
output: '-',
|
43
|
-
type: TYPES.first,
|
44
|
-
key_format: '%s'
|
45
|
-
)
|
46
|
-
end
|
47
|
-
|
34
|
+
def self.defaults
|
35
|
+
super.merge(
|
36
|
+
config: "#{name.split('::').last.downcase}.yaml",
|
37
|
+
input: '-',
|
38
|
+
output: '-'
|
39
|
+
)
|
48
40
|
end
|
49
41
|
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
klass = case type = options[:type]
|
54
|
-
when 'dat'
|
55
|
-
Writer
|
56
|
-
when 'dbm'
|
57
|
-
require 'midos'
|
58
|
-
options[:output_encoding] ||= Midos::DEFAULT_ENCODING
|
59
|
-
Midos::Writer
|
60
|
-
else
|
61
|
-
quit "Unsupported type: #{type}. Must be one of: #{TYPES.join(', ')}."
|
62
|
-
end
|
63
|
-
|
64
|
-
Bismas.filter(klass, options, &method(:quit))
|
42
|
+
def require_gem(gem, lib = gem)
|
43
|
+
require lib
|
65
44
|
rescue LoadError => err
|
66
|
-
abort "Please install the `#{
|
67
|
-
end
|
68
|
-
|
69
|
-
private
|
70
|
-
|
71
|
-
def opts(opts)
|
72
|
-
opts.option(:input__FILE, 'Path to input file [Default: STDIN]')
|
73
|
-
|
74
|
-
opts.option(:output__FILE, 'Path to output file [Default: STDOUT]')
|
75
|
-
|
76
|
-
opts.separator
|
77
|
-
opts.separator 'Writer options:'
|
78
|
-
|
79
|
-
opts.option(:type__TYPE, "Output file type (#{TYPES.join(', ')}) [Default: #{TYPES.first}]")
|
80
|
-
|
81
|
-
opts.separator
|
82
|
-
|
83
|
-
opts.option(:output_encoding__ENCODING, :n, 'Output encoding [Default: depends on TYPE]')
|
84
|
-
|
85
|
-
opts.separator
|
86
|
-
|
87
|
-
opts.option(:output_key__KEY, :k, 'ID key of output file')
|
88
|
-
opts.option(:key_format__KEY_FORMAT, :f, 'Key format [Default: %s]')
|
89
|
-
|
90
|
-
opts.separator
|
91
|
-
|
92
|
-
opts.option(:mapping__FILE_OR_YAML, 'Path to mapping file or YAML string')
|
93
|
-
|
94
|
-
opts.separator
|
95
|
-
|
96
|
-
opts.switch(:sort, 'Sort each record')
|
97
|
-
|
98
|
-
opts.separator
|
99
|
-
|
100
|
-
opts.option(:execute__CODE, 'Code to execute for each _record_ before mapping') { |e|
|
101
|
-
options[:execute] << e
|
102
|
-
}
|
103
|
-
|
104
|
-
opts.option(:execute_mapped__CODE, :E, 'Code to execute for each _record_ after mapping') { |e|
|
105
|
-
options[:execute_mapped] << e
|
106
|
-
}
|
107
|
-
|
108
|
-
opts.separator
|
109
|
-
|
110
|
-
opts.option(:padding_length__LENGTH, :P, Integer, "Length of padding for TYPE=dat [Default: #{DEFAULT_PADDING_LENGTH}]")
|
111
|
-
|
112
|
-
opts.separator
|
113
|
-
opts.separator 'Reader options:'
|
114
|
-
|
115
|
-
opts.option(:input_encoding__ENCODING, :N, "Input encoding [Default: #{DEFAULT_ENCODING}]")
|
116
|
-
|
117
|
-
opts.separator
|
118
|
-
|
119
|
-
opts.option(:input_key__KEY, :K, 'ID key of input file')
|
120
|
-
|
121
|
-
opts.separator
|
122
|
-
|
123
|
-
opts.switch(:strict, :S, 'Turn parse warnings into errors')
|
124
|
-
|
125
|
-
opts.switch(:silent, :T, 'Silence parse warnings')
|
126
|
-
|
127
|
-
opts.separator
|
128
|
-
|
129
|
-
opts.switch(:legacy, :L, 'Use the legacy parser')
|
130
|
-
|
131
|
-
opts.separator
|
132
|
-
opts.separator 'Common options:'
|
133
|
-
|
134
|
-
opts.option(:category_length__LENGTH, :C, Integer, "Length of category for TYPE=dat [Default: #{DEFAULT_CATEGORY_LENGTH}]")
|
45
|
+
abort "Please install the `#{gem}' gem. (#{err})"
|
135
46
|
end
|
136
47
|
|
137
48
|
end
|
138
49
|
|
139
50
|
end
|
51
|
+
|
52
|
+
require_relative 'cli/chardiff'
|
53
|
+
require_relative 'cli/filter'
|
54
|
+
require_relative 'cli/xml'
|
@@ -0,0 +1,116 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# bismas -- A Ruby client for BISMAS databases #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
12
|
+
# under the terms of the GNU Affero General Public License as published by #
|
13
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
14
|
+
# option) any later version. #
|
15
|
+
# #
|
16
|
+
# bismas is distributed in the hope that it will be useful, but #
|
17
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
18
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
19
|
+
# License for more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'csv'
|
28
|
+
|
29
|
+
module Bismas
|
30
|
+
|
31
|
+
class CLI
|
32
|
+
|
33
|
+
class Chardiff < self
|
34
|
+
|
35
|
+
def self.defaults
|
36
|
+
super.merge(
|
37
|
+
file1_encoding: DEFAULT_ENCODING,
|
38
|
+
file2_encoding: DEFAULT_ENCODING,
|
39
|
+
output_encoding: Encoding.default_external
|
40
|
+
)
|
41
|
+
end
|
42
|
+
|
43
|
+
def run(arguments)
|
44
|
+
quit unless arguments.empty?
|
45
|
+
|
46
|
+
output_encoding, frequencies = options[:output_encoding],
|
47
|
+
Hash.new { |h, k| h[k] = Hash.new { |i, j| i[j] = [0, 0] } }
|
48
|
+
|
49
|
+
2.times { |index|
|
50
|
+
file = "file#{index + 1}"
|
51
|
+
|
52
|
+
reader_options = {
|
53
|
+
encoding: "#{options[:"#{file}_encoding"]}:#{output_encoding}",
|
54
|
+
key: key = options[:"#{file}_key"]
|
55
|
+
}
|
56
|
+
|
57
|
+
Reader.parse_file(options[file.to_sym], reader_options) { |id, record|
|
58
|
+
frequency = Hash.new(0); record.delete(key)
|
59
|
+
|
60
|
+
record.each_value { |array| array.each { |value|
|
61
|
+
value.each_char { |char| frequency[char] += 1 } } }
|
62
|
+
|
63
|
+
frequencies.values_at(0, key ? id.to_i : $.).each { |hash|
|
64
|
+
frequency.each { |char, count| hash[char][index] += count } }
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
File.open_file(options[:output], {}, 'w') { |io|
|
69
|
+
begin
|
70
|
+
csv = CSV.new(io) << %w[id char count1 count2 diff]
|
71
|
+
|
72
|
+
frequencies.sort_by { |id,| id }.each { |id, hash|
|
73
|
+
hash.sort_by { |char,| char }.each { |char, (count1, count2)|
|
74
|
+
unless count1 == count2
|
75
|
+
csv << [id, char, count1, count2, count2 - count1]
|
76
|
+
end
|
77
|
+
}
|
78
|
+
}
|
79
|
+
ensure
|
80
|
+
csv.close if csv
|
81
|
+
end
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def opts(opts)
|
88
|
+
opts.option(:file1__FILE, :i, 'Path to input file 1 [Required]')
|
89
|
+
opts.option(:file2__FILE, :j, 'Path to input file 2 [Required]')
|
90
|
+
|
91
|
+
opts.separator
|
92
|
+
|
93
|
+
opts.option(:output__FILE, 'Path to output file [Default: STDOUT]')
|
94
|
+
|
95
|
+
opts.separator
|
96
|
+
opts.separator 'Input options:'
|
97
|
+
|
98
|
+
opts.option(:file1_encoding__ENCODING, :N, "File 1 encoding [Default: #{DEFAULT_ENCODING}]")
|
99
|
+
opts.option(:file2_encoding__ENCODING, :O, "File 2 encoding [Default: #{DEFAULT_ENCODING}]")
|
100
|
+
|
101
|
+
opts.separator
|
102
|
+
|
103
|
+
opts.option(:file1_key__KEY, :K, 'ID key of file 1')
|
104
|
+
opts.option(:file2_key__KEY, :L, 'ID key of file 2')
|
105
|
+
|
106
|
+
opts.separator
|
107
|
+
opts.separator 'Output options:'
|
108
|
+
|
109
|
+
opts.option(:output_encoding__ENCODING, :n, "Output encoding [Default: #{defaults[:output_encoding]}]")
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# bismas -- A Ruby client for BISMAS databases #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
12
|
+
# under the terms of the GNU Affero General Public License as published by #
|
13
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
14
|
+
# option) any later version. #
|
15
|
+
# #
|
16
|
+
# bismas is distributed in the hope that it will be useful, but #
|
17
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
18
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
19
|
+
# License for more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
module Bismas
|
28
|
+
|
29
|
+
class CLI
|
30
|
+
|
31
|
+
class Filter < self
|
32
|
+
|
33
|
+
TYPES = %w[dat dbm]
|
34
|
+
|
35
|
+
def self.defaults
|
36
|
+
super.merge(
|
37
|
+
type: TYPES.first,
|
38
|
+
key_format: '%s'
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def run(arguments)
|
43
|
+
quit unless arguments.empty?
|
44
|
+
|
45
|
+
klass = case type = options[:type]
|
46
|
+
when 'dat'
|
47
|
+
Writer
|
48
|
+
when 'dbm'
|
49
|
+
require_gem 'midos'
|
50
|
+
options[:output_encoding] ||= Midos::DEFAULT_ENCODING
|
51
|
+
Midos::Writer
|
52
|
+
else
|
53
|
+
quit "Unsupported type: #{type}. Must be one of: #{TYPES.join(', ')}."
|
54
|
+
end
|
55
|
+
|
56
|
+
Bismas.filter(klass, options, &method(:quit))
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def opts(opts)
|
62
|
+
opts.option(:input__FILE, 'Path to input file [Default: STDIN]')
|
63
|
+
|
64
|
+
opts.option(:output__FILE, 'Path to output file [Default: STDOUT]')
|
65
|
+
|
66
|
+
opts.separator
|
67
|
+
opts.separator 'Input options:'
|
68
|
+
|
69
|
+
opts.option(:input_encoding__ENCODING, :N, "Input encoding [Default: #{DEFAULT_ENCODING}]")
|
70
|
+
|
71
|
+
opts.separator
|
72
|
+
|
73
|
+
opts.option(:input_key__KEY, :K, 'ID key of input file')
|
74
|
+
|
75
|
+
opts.separator
|
76
|
+
|
77
|
+
opts.switch(:strict, :S, 'Turn parse warnings into errors')
|
78
|
+
|
79
|
+
opts.switch(:silent, :T, 'Silence parse warnings')
|
80
|
+
|
81
|
+
opts.separator
|
82
|
+
|
83
|
+
opts.switch(:legacy, :L, 'Use the legacy parser')
|
84
|
+
|
85
|
+
opts.separator
|
86
|
+
opts.separator 'Output options:'
|
87
|
+
|
88
|
+
opts.option(:type__TYPE, "Output file type (#{TYPES.join(', ')}) [Default: #{TYPES.first}]")
|
89
|
+
|
90
|
+
opts.separator
|
91
|
+
|
92
|
+
opts.option(:output_encoding__ENCODING, :n, 'Output encoding [Default: depends on TYPE]')
|
93
|
+
|
94
|
+
opts.separator
|
95
|
+
|
96
|
+
opts.option(:output_key__KEY, :k, 'ID key of output file')
|
97
|
+
opts.option(:key_format__KEY_FORMAT, :f, 'Key format [Default: %s]')
|
98
|
+
|
99
|
+
opts.separator
|
100
|
+
|
101
|
+
opts.option(:mapping__FILE_OR_YAML, 'Path to mapping file or YAML string')
|
102
|
+
|
103
|
+
opts.separator
|
104
|
+
|
105
|
+
opts.switch(:sort, 'Sort each record')
|
106
|
+
|
107
|
+
opts.separator
|
108
|
+
|
109
|
+
opts.option(:execute__CODE, 'Code to execute for each _record_ before mapping') { |e|
|
110
|
+
options[:execute] << e
|
111
|
+
}
|
112
|
+
|
113
|
+
opts.option(:execute_mapped__CODE, :E, 'Code to execute for each _record_ after mapping') { |e|
|
114
|
+
options[:execute_mapped] << e
|
115
|
+
}
|
116
|
+
|
117
|
+
opts.separator
|
118
|
+
|
119
|
+
opts.option(:padding_length__LENGTH, :P, Integer, "Length of padding for TYPE=dat [Default: #{DEFAULT_PADDING_LENGTH}]")
|
120
|
+
|
121
|
+
opts.separator
|
122
|
+
opts.separator 'Common options:'
|
123
|
+
|
124
|
+
opts.option(:category_length__LENGTH, :C, Integer, "Length of category for TYPE=dat [Default: #{DEFAULT_CATEGORY_LENGTH}]")
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# bismas -- A Ruby client for BISMAS databases #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
12
|
+
# under the terms of the GNU Affero General Public License as published by #
|
13
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
14
|
+
# option) any later version. #
|
15
|
+
# #
|
16
|
+
# bismas is distributed in the hope that it will be useful, but #
|
17
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
18
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
19
|
+
# License for more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
module Bismas
|
28
|
+
|
29
|
+
class CLI
|
30
|
+
|
31
|
+
class XML < self
|
32
|
+
|
33
|
+
def run(arguments)
|
34
|
+
require_gem 'builder'
|
35
|
+
|
36
|
+
quit unless arguments.empty?
|
37
|
+
|
38
|
+
quit 'Schema file is required' unless schema_file = options[:schema]
|
39
|
+
quit "No such file: #{schema_file}" unless File.readable?(schema_file)
|
40
|
+
|
41
|
+
schema = Schema.parse_file(schema_file)
|
42
|
+
|
43
|
+
reader_options = {
|
44
|
+
encoding: options[:encoding],
|
45
|
+
key: options[:key],
|
46
|
+
strict: options[:strict],
|
47
|
+
silent: options[:silent],
|
48
|
+
category_length: schema.category_length
|
49
|
+
}
|
50
|
+
|
51
|
+
File.open_file(options[:output], {}, 'wb') { |f|
|
52
|
+
xml = Builder::XmlMarkup.new(indent: 2, target: f)
|
53
|
+
xml.instruct!
|
54
|
+
|
55
|
+
xml.records(name: schema.name, description: schema.title) {
|
56
|
+
Reader.parse_file(options[:input], reader_options) { |id, record|
|
57
|
+
xml.record(id: id) {
|
58
|
+
record.sort_by { |key,| key }.each { |key, values|
|
59
|
+
values.each { |value|
|
60
|
+
xml.field(value, name: key, description: schema[key])
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def opts(opts)
|
72
|
+
opts.option(:input__FILE, 'Path to input file [Default: STDIN]')
|
73
|
+
|
74
|
+
opts.option(:output__FILE, 'Path to output file [Default: STDOUT]')
|
75
|
+
|
76
|
+
opts.option(:schema__FILE, 'Path to schema file [Required]')
|
77
|
+
|
78
|
+
opts.separator
|
79
|
+
|
80
|
+
opts.option(:encoding__ENCODING, :N, "Input encoding [Default: #{DEFAULT_ENCODING}]")
|
81
|
+
|
82
|
+
opts.separator
|
83
|
+
|
84
|
+
opts.option(:key__KEY, :K, 'ID key of input file')
|
85
|
+
|
86
|
+
opts.separator
|
87
|
+
|
88
|
+
opts.switch(:strict, :S, 'Turn parse warnings into errors')
|
89
|
+
|
90
|
+
opts.switch(:silent, :T, 'Silence parse warnings')
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
@@ -26,32 +26,50 @@
|
|
26
26
|
|
27
27
|
module Bismas
|
28
28
|
|
29
|
-
class
|
29
|
+
class Schema
|
30
30
|
|
31
31
|
FIELD_RE = %r{\Afeld\s+=\s+(\d+)}i
|
32
32
|
|
33
33
|
CATEGORY_RE = lambda { |category_length|
|
34
34
|
%r{\A(.{#{category_length}})"(.+?)"} }
|
35
35
|
|
36
|
-
|
37
|
-
Bismas.amend_encoding(options)
|
36
|
+
class << self
|
38
37
|
|
39
|
-
|
38
|
+
def parse(*args)
|
39
|
+
new.parse(*args)
|
40
|
+
end
|
40
41
|
|
41
|
-
|
42
|
+
def parse_file(file, options = {})
|
43
|
+
Bismas.amend_encoding(options)
|
44
|
+
File.open(file, 'rb', options, &method(:parse))
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
def initialize
|
50
|
+
@title, @name, @category_length, @categories = nil, nil, nil, {}
|
51
|
+
end
|
52
|
+
|
53
|
+
attr_accessor :title, :name, :category_length
|
54
|
+
|
55
|
+
def parse(io)
|
56
|
+
category_re = nil
|
57
|
+
|
58
|
+
io.each { |line|
|
42
59
|
case line
|
43
|
-
when FIELD_RE
|
44
|
-
category_re = CATEGORY_RE[$1.to_i]
|
45
60
|
when category_re
|
46
|
-
|
61
|
+
self[$1.strip] = $2.strip
|
62
|
+
when FIELD_RE
|
63
|
+
category_re = CATEGORY_RE[
|
64
|
+
self.category_length = $1.to_i]
|
65
|
+
when String
|
66
|
+
title ? name ? nil :
|
67
|
+
self.name = line.strip :
|
68
|
+
self.title = line.strip
|
47
69
|
end
|
48
70
|
}
|
49
71
|
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def initialize
|
54
|
-
@categories = {}
|
72
|
+
self
|
55
73
|
end
|
56
74
|
|
57
75
|
def categories
|
@@ -66,10 +84,6 @@ module Bismas
|
|
66
84
|
@categories[key] = value
|
67
85
|
end
|
68
86
|
|
69
|
-
def to_h
|
70
|
-
@categories
|
71
|
-
end
|
72
|
-
|
73
87
|
end
|
74
88
|
|
75
89
|
end
|
data/lib/bismas/version.rb
CHANGED
data/lib/bismas/writer.rb
CHANGED
@@ -85,9 +85,11 @@ module Bismas
|
|
85
85
|
def write_i(id, record, io = io())
|
86
86
|
return if record.empty?
|
87
87
|
|
88
|
-
|
88
|
+
if key && !record.key?(key)
|
89
|
+
record = { key => id || auto_id.call }.update(record)
|
90
|
+
end
|
89
91
|
|
90
|
-
|
92
|
+
category_format, fs = "%-#{@category_length}s", @chars[:fs]
|
91
93
|
|
92
94
|
io << @chars[:rs]
|
93
95
|
|
data/spec/bismas/reader_spec.rb
CHANGED
@@ -1,15 +1,27 @@
|
|
1
1
|
describe Bismas::Reader do
|
2
2
|
|
3
|
+
def parse_file(options = {})
|
4
|
+
described_class.parse_file(data('test.dat'), options)
|
5
|
+
end
|
6
|
+
|
7
|
+
example do
|
8
|
+
expect(parse_file.size).to eq(14)
|
9
|
+
end
|
10
|
+
|
11
|
+
example do
|
12
|
+
expect(parse_file(encoding: nil).size).to eq(14)
|
13
|
+
end
|
14
|
+
|
3
15
|
example do
|
4
|
-
expect(
|
16
|
+
expect(parse_file(encoding: Encoding::CP850).size).to eq(14)
|
5
17
|
end
|
6
18
|
|
7
19
|
example do
|
8
|
-
expect
|
20
|
+
expect{silence{parse_file(encoding: '')}}.to raise_error(TypeError, 'no implicit conversion of nil into String')
|
9
21
|
end
|
10
22
|
|
11
23
|
example do
|
12
|
-
expect(
|
24
|
+
expect(parse_file(strict: true)).to eq({
|
13
25
|
1 => { "005" => ["Bock, F."], "020" => ["Zur Geschichte des Schlagwortkatalogs in Praxis und Theorie"], "030" => ["Zentralblatt f\x81r Bibliothekswesen. 40(1923), S.494-502."], "055" => ["1923"], "060" => ["Geschichte des Schlagwortkataloges"], "059" => ["d"], "053" => ["a"], "120" => ["D"] },
|
14
26
|
2 => { "005" => ["Bravo, B.R. => Rodriguez Bravo, B."], "150" => ["22. 4.2007 19:43:53"] },
|
15
27
|
3 => { "005" => ["Simmons, P."], "020" => ["Microcomputer software for ISO 2709 record conversion"], "030" => ["Microcomputers for information management. 6(1989), S.197-205."], "055" => ["1989"], "060" => ["Bibliographische Software"], "053" => ["a"], "059" => ["e"], "100" => ["ISO 2709"], "065" => ["Datenformate"] },
|
@@ -0,0 +1,56 @@
|
|
1
|
+
describe Bismas::Schema do
|
2
|
+
|
3
|
+
subject { described_class.parse_file(data('test.cat')) }
|
4
|
+
|
5
|
+
example do
|
6
|
+
expect(subject.title).to eq(*encode("Literatur zur Inhaltserschlie\xE1ung"))
|
7
|
+
end
|
8
|
+
|
9
|
+
example do
|
10
|
+
expect(subject.name).to eq('LIT')
|
11
|
+
end
|
12
|
+
|
13
|
+
example do
|
14
|
+
expect(subject.category_length).to eq(4)
|
15
|
+
end
|
16
|
+
|
17
|
+
example do
|
18
|
+
expect(subject.categories).to eq(%w[
|
19
|
+
005 010 015 017 020 025 027 030 035 045 050 051 052 053 055 056 057 058
|
20
|
+
059 060 065 070 080 100 101 102 103 104 105 106 107 108 109 110 115 120
|
21
|
+
122 125 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
22
|
+
146 148 150 152
|
23
|
+
])
|
24
|
+
end
|
25
|
+
|
26
|
+
example do
|
27
|
+
expect(subject['000']).to be_nil
|
28
|
+
end
|
29
|
+
|
30
|
+
{
|
31
|
+
'005' => '1. Verfasser',
|
32
|
+
'015' => '3. Verfasser',
|
33
|
+
'020' => 'Hauptsachtitel',
|
34
|
+
'027' => 'Ausgabevermerk',
|
35
|
+
'035' => 'Serientitel',
|
36
|
+
'050' => 'Verlag',
|
37
|
+
'052' => 'ISBN',
|
38
|
+
'055' => 'Erscheinungsjahr',
|
39
|
+
'057' => 'Inhalt',
|
40
|
+
'059' => 'Sprache',
|
41
|
+
'065' => 'Themenbereich',
|
42
|
+
'080' => 'Behandelte Form',
|
43
|
+
'101' => 'Objekt',
|
44
|
+
'103' => 'Objekt',
|
45
|
+
'115' => 'Fachgebiet',
|
46
|
+
'120' => 'Land/Ort',
|
47
|
+
'122' => 'Sparte',
|
48
|
+
'130' => 'DDC',
|
49
|
+
'140' => 'Biographierte',
|
50
|
+
'145' => 'Bild',
|
51
|
+
'148' => 'Einstufung',
|
52
|
+
'150' => 'Datum1',
|
53
|
+
'152' => 'Datum2'
|
54
|
+
}.each { |k, v| example { expect(subject[k]).to eq(*encode(v)) } }
|
55
|
+
|
56
|
+
end
|
data/spec/data/test.cat
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
Literatur zur Inhaltserschlie�ung
|
2
|
+
LIT
|
3
|
+
#PARAMETER#
|
4
|
+
Feld = 4
|
5
|
+
#INDEX#
|
6
|
+
#Kategorien#
|
7
|
+
005 " 1. Verfasser"!
|
8
|
+
010 " 2. Verfasser"
|
9
|
+
015 " 3. Verfasser" W
|
10
|
+
017 " Herausgeber" W
|
11
|
+
020 " Hauptsachtitel"!
|
12
|
+
025 " Zusatz HST"
|
13
|
+
027 " Ausgabevermerk"
|
14
|
+
030 " Quelle"!
|
15
|
+
035 " Serientitel"
|
16
|
+
045 " Verlagsort"
|
17
|
+
050 " Verlag"
|
18
|
+
051 " Umfang"
|
19
|
+
052 " ISBN" W
|
20
|
+
053 " Doktyp"! W
|
21
|
+
055 "Erscheinungsjahr"!
|
22
|
+
056 " Abstract"! W
|
23
|
+
057 " Inhalt" W
|
24
|
+
058 " Fu�note" W
|
25
|
+
059 " Sprache"! W
|
26
|
+
060 " Themenbereich"! N
|
27
|
+
065 " Themenbereich" N
|
28
|
+
070 " Themenbereich" N W
|
29
|
+
080 " Behandelte Form"!N W
|
30
|
+
100 " Objekt"!
|
31
|
+
101 " Objekt"
|
32
|
+
102 " Objekt"
|
33
|
+
103 " Objekt"
|
34
|
+
104 " Objekt"
|
35
|
+
105 " Objekt"
|
36
|
+
106 " Objekt"
|
37
|
+
107 " Objekt"
|
38
|
+
108 " Objekt"
|
39
|
+
109 " Objekt" W
|
40
|
+
110 " Hilfsmittel"! W
|
41
|
+
115 " Fachgebiet"! N W
|
42
|
+
120 " Land/Ort"! W
|
43
|
+
122 " Sparte" N W
|
44
|
+
125 " Signatur" W
|
45
|
+
130 " DDC" W
|
46
|
+
131 " LCC" W
|
47
|
+
132 " RVK" W
|
48
|
+
133 " BK" W
|
49
|
+
134 "Eppelsheimer SBB" W
|
50
|
+
135 " RSWK" W
|
51
|
+
136 " LCSH" W
|
52
|
+
137 " PRECIS" W
|
53
|
+
138 " COMPASS" W
|
54
|
+
139 " GHBS" W
|
55
|
+
140 " Biographierte" W
|
56
|
+
141 " ASB" W
|
57
|
+
142 " SSD" W
|
58
|
+
143 " SfB" W
|
59
|
+
144 " KAB" W
|
60
|
+
145 " Bild" B("" "d:\Irfan\i_view32.exe cat$") W
|
61
|
+
146 " Abbildung" N
|
62
|
+
148 " Einstufung" N W
|
63
|
+
150 " Datum1"! DC
|
64
|
+
152 " Datum2" DM
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bismas
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cyclops
|
@@ -89,7 +89,9 @@ dependencies:
|
|
89
89
|
description: Access BISMAS databases from Ruby.
|
90
90
|
email: jens.wille@gmail.com
|
91
91
|
executables:
|
92
|
-
- bismas
|
92
|
+
- bismas-chardiff
|
93
|
+
- bismas-filter
|
94
|
+
- bismas2xml
|
93
95
|
extensions: []
|
94
96
|
extra_rdoc_files:
|
95
97
|
- README
|
@@ -100,18 +102,25 @@ files:
|
|
100
102
|
- ChangeLog
|
101
103
|
- README
|
102
104
|
- Rakefile
|
103
|
-
- bin/bismas
|
105
|
+
- bin/bismas-chardiff
|
106
|
+
- bin/bismas-filter
|
107
|
+
- bin/bismas2xml
|
104
108
|
- lib/bismas.rb
|
105
109
|
- lib/bismas/base.rb
|
106
|
-
- lib/bismas/categories.rb
|
107
110
|
- lib/bismas/cli.rb
|
111
|
+
- lib/bismas/cli/chardiff.rb
|
112
|
+
- lib/bismas/cli/filter.rb
|
113
|
+
- lib/bismas/cli/xml.rb
|
108
114
|
- lib/bismas/mapping.rb
|
109
115
|
- lib/bismas/parser.rb
|
110
116
|
- lib/bismas/reader.rb
|
117
|
+
- lib/bismas/schema.rb
|
111
118
|
- lib/bismas/version.rb
|
112
119
|
- lib/bismas/writer.rb
|
113
120
|
- spec/bismas/parser_spec.rb
|
114
121
|
- spec/bismas/reader_spec.rb
|
122
|
+
- spec/bismas/schema_spec.rb
|
123
|
+
- spec/data/test.cat
|
115
124
|
- spec/data/test.dat
|
116
125
|
- spec/spec_helper.rb
|
117
126
|
homepage: http://github.com/blackwinter/bismas
|
@@ -120,14 +129,19 @@ licenses:
|
|
120
129
|
metadata: {}
|
121
130
|
post_install_message: |2+
|
122
131
|
|
123
|
-
bismas-0.
|
132
|
+
bismas-0.3.0 [2015-12-11]:
|
124
133
|
|
125
|
-
* Added
|
126
|
-
*
|
134
|
+
* Added +bismas2xml+ executable.
|
135
|
+
* Added +bismas-chardiff+ executable.
|
136
|
+
* Renamed +bismas+ executable to +bismas-filter+.
|
137
|
+
* Renamed Bismas::Categories to Bismas::Schema and extended scope.
|
138
|
+
* Fixed Bismas.amend_encoding to use default encoding when encoding option is
|
139
|
+
+nil+.
|
140
|
+
* Changed Bismas::Writer to insert key field at beginning of record.
|
127
141
|
|
128
142
|
rdoc_options:
|
129
143
|
- "--title"
|
130
|
-
- bismas Application documentation (v0.
|
144
|
+
- bismas Application documentation (v0.3.0)
|
131
145
|
- "--charset"
|
132
146
|
- UTF-8
|
133
147
|
- "--line-numbers"
|
@@ -148,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
148
162
|
version: '0'
|
149
163
|
requirements: []
|
150
164
|
rubyforge_project:
|
151
|
-
rubygems_version: 2.5.
|
165
|
+
rubygems_version: 2.5.1
|
152
166
|
signing_key:
|
153
167
|
specification_version: 4
|
154
168
|
summary: A Ruby client for BISMAS databases.
|