bismas 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +10 -0
- data/README +1 -1
- data/bin/bismas-chardiff +30 -0
- data/bin/bismas-filter +30 -0
- data/bin/{bismas → bismas2xml} +1 -1
- data/lib/bismas.rb +2 -2
- data/lib/bismas/cli.rb +13 -98
- data/lib/bismas/cli/chardiff.rb +116 -0
- data/lib/bismas/cli/filter.rb +131 -0
- data/lib/bismas/cli/xml.rb +97 -0
- data/lib/bismas/{categories.rb → schema.rb} +31 -17
- data/lib/bismas/version.rb +1 -1
- data/lib/bismas/writer.rb +4 -2
- data/spec/bismas/reader_spec.rb +15 -3
- data/spec/bismas/schema_spec.rb +56 -0
- data/spec/data/test.cat +64 -0
- data/spec/spec_helper.rb +1 -0
- metadata +24 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a7bfa4d757177bec894da5ec52a0e5c19aa9d9e
|
4
|
+
data.tar.gz: fd631175f071d9b1815e30d14c634878824eaeca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5afed0589b2e33679976c397e032610e25fb7bc9b997fb4ede71ea53c2d81162cfefe2a20d16bc3e9d8f423419c87eeb8d97fff628bfa937340ada17ab727c4c
|
7
|
+
data.tar.gz: 84e252197825b23ed413abbe78eaebf6675148db8ca1c7c64e9bcc29acbbc11082c1616a1067682a765f76ec362c86c39ddbe1ca29e4225d07517c6446265c27
|
data/ChangeLog
CHANGED
@@ -2,6 +2,16 @@
|
|
2
2
|
|
3
3
|
= Revision history for bismas
|
4
4
|
|
5
|
+
== 0.3.0 [2015-12-11]
|
6
|
+
|
7
|
+
* Added +bismas2xml+ executable.
|
8
|
+
* Added +bismas-chardiff+ executable.
|
9
|
+
* Renamed +bismas+ executable to +bismas-filter+.
|
10
|
+
* Renamed Bismas::Categories to Bismas::Schema and extended scope.
|
11
|
+
* Fixed Bismas.amend_encoding to use default encoding when encoding option is
|
12
|
+
+nil+.
|
13
|
+
* Changed Bismas::Writer to insert key field at beginning of record.
|
14
|
+
|
5
15
|
== 0.2.0 [2015-12-03]
|
6
16
|
|
7
17
|
* Added <tt>.CAT</tt> parser.
|
data/README
CHANGED
data/bin/bismas-chardiff
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# bismas -- A Ruby client for BISMAS databases #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2015 Jens Wille #
|
9
|
+
# #
|
10
|
+
# Authors: #
|
11
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
|
+
# #
|
13
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
14
|
+
# under the terms of the GNU Affero General Public License as published by #
|
15
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
16
|
+
# option) any later version. #
|
17
|
+
# #
|
18
|
+
# bismas is distributed in the hope that it will be useful, but #
|
19
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
20
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
21
|
+
# License for more details. #
|
22
|
+
# #
|
23
|
+
# You should have received a copy of the GNU Affero General Public License #
|
24
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
25
|
+
# #
|
26
|
+
###############################################################################
|
27
|
+
#++
|
28
|
+
|
29
|
+
require 'bismas/cli'
|
30
|
+
Bismas::CLI::Chardiff.execute
|
data/bin/bismas-filter
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# bismas -- A Ruby client for BISMAS databases #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2015 Jens Wille #
|
9
|
+
# #
|
10
|
+
# Authors: #
|
11
|
+
# Jens Wille <jens.wille@gmail.com> #
|
12
|
+
# #
|
13
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
14
|
+
# under the terms of the GNU Affero General Public License as published by #
|
15
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
16
|
+
# option) any later version. #
|
17
|
+
# #
|
18
|
+
# bismas is distributed in the hope that it will be useful, but #
|
19
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
20
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
21
|
+
# License for more details. #
|
22
|
+
# #
|
23
|
+
# You should have received a copy of the GNU Affero General Public License #
|
24
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
25
|
+
# #
|
26
|
+
###############################################################################
|
27
|
+
#++
|
28
|
+
|
29
|
+
require 'bismas/cli'
|
30
|
+
Bismas::CLI::Filter.execute
|
data/bin/{bismas → bismas2xml}
RENAMED
data/lib/bismas.rb
CHANGED
@@ -131,7 +131,7 @@ module Bismas
|
|
131
131
|
end
|
132
132
|
|
133
133
|
def amend_encoding(options, default_encoding = DEFAULT_ENCODING)
|
134
|
-
encoding = options
|
134
|
+
encoding = (options[:encoding] || default_encoding).to_s
|
135
135
|
|
136
136
|
options[:encoding] = encoding.start_with?(':') ?
|
137
137
|
default_encoding.to_s + encoding : encoding
|
@@ -142,8 +142,8 @@ module Bismas
|
|
142
142
|
end
|
143
143
|
|
144
144
|
require_relative 'bismas/base'
|
145
|
+
require_relative 'bismas/schema'
|
145
146
|
require_relative 'bismas/reader'
|
146
147
|
require_relative 'bismas/writer'
|
147
148
|
require_relative 'bismas/mapping'
|
148
|
-
require_relative 'bismas/categories'
|
149
149
|
require_relative 'bismas/version'
|
data/lib/bismas/cli.rb
CHANGED
@@ -31,109 +31,24 @@ module Bismas
|
|
31
31
|
|
32
32
|
class CLI < Cyclops
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
config: 'config.yaml',
|
41
|
-
input: '-',
|
42
|
-
output: '-',
|
43
|
-
type: TYPES.first,
|
44
|
-
key_format: '%s'
|
45
|
-
)
|
46
|
-
end
|
47
|
-
|
34
|
+
def self.defaults
|
35
|
+
super.merge(
|
36
|
+
config: "#{name.split('::').last.downcase}.yaml",
|
37
|
+
input: '-',
|
38
|
+
output: '-'
|
39
|
+
)
|
48
40
|
end
|
49
41
|
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
klass = case type = options[:type]
|
54
|
-
when 'dat'
|
55
|
-
Writer
|
56
|
-
when 'dbm'
|
57
|
-
require 'midos'
|
58
|
-
options[:output_encoding] ||= Midos::DEFAULT_ENCODING
|
59
|
-
Midos::Writer
|
60
|
-
else
|
61
|
-
quit "Unsupported type: #{type}. Must be one of: #{TYPES.join(', ')}."
|
62
|
-
end
|
63
|
-
|
64
|
-
Bismas.filter(klass, options, &method(:quit))
|
42
|
+
def require_gem(gem, lib = gem)
|
43
|
+
require lib
|
65
44
|
rescue LoadError => err
|
66
|
-
abort "Please install the `#{
|
67
|
-
end
|
68
|
-
|
69
|
-
private
|
70
|
-
|
71
|
-
def opts(opts)
|
72
|
-
opts.option(:input__FILE, 'Path to input file [Default: STDIN]')
|
73
|
-
|
74
|
-
opts.option(:output__FILE, 'Path to output file [Default: STDOUT]')
|
75
|
-
|
76
|
-
opts.separator
|
77
|
-
opts.separator 'Writer options:'
|
78
|
-
|
79
|
-
opts.option(:type__TYPE, "Output file type (#{TYPES.join(', ')}) [Default: #{TYPES.first}]")
|
80
|
-
|
81
|
-
opts.separator
|
82
|
-
|
83
|
-
opts.option(:output_encoding__ENCODING, :n, 'Output encoding [Default: depends on TYPE]')
|
84
|
-
|
85
|
-
opts.separator
|
86
|
-
|
87
|
-
opts.option(:output_key__KEY, :k, 'ID key of output file')
|
88
|
-
opts.option(:key_format__KEY_FORMAT, :f, 'Key format [Default: %s]')
|
89
|
-
|
90
|
-
opts.separator
|
91
|
-
|
92
|
-
opts.option(:mapping__FILE_OR_YAML, 'Path to mapping file or YAML string')
|
93
|
-
|
94
|
-
opts.separator
|
95
|
-
|
96
|
-
opts.switch(:sort, 'Sort each record')
|
97
|
-
|
98
|
-
opts.separator
|
99
|
-
|
100
|
-
opts.option(:execute__CODE, 'Code to execute for each _record_ before mapping') { |e|
|
101
|
-
options[:execute] << e
|
102
|
-
}
|
103
|
-
|
104
|
-
opts.option(:execute_mapped__CODE, :E, 'Code to execute for each _record_ after mapping') { |e|
|
105
|
-
options[:execute_mapped] << e
|
106
|
-
}
|
107
|
-
|
108
|
-
opts.separator
|
109
|
-
|
110
|
-
opts.option(:padding_length__LENGTH, :P, Integer, "Length of padding for TYPE=dat [Default: #{DEFAULT_PADDING_LENGTH}]")
|
111
|
-
|
112
|
-
opts.separator
|
113
|
-
opts.separator 'Reader options:'
|
114
|
-
|
115
|
-
opts.option(:input_encoding__ENCODING, :N, "Input encoding [Default: #{DEFAULT_ENCODING}]")
|
116
|
-
|
117
|
-
opts.separator
|
118
|
-
|
119
|
-
opts.option(:input_key__KEY, :K, 'ID key of input file')
|
120
|
-
|
121
|
-
opts.separator
|
122
|
-
|
123
|
-
opts.switch(:strict, :S, 'Turn parse warnings into errors')
|
124
|
-
|
125
|
-
opts.switch(:silent, :T, 'Silence parse warnings')
|
126
|
-
|
127
|
-
opts.separator
|
128
|
-
|
129
|
-
opts.switch(:legacy, :L, 'Use the legacy parser')
|
130
|
-
|
131
|
-
opts.separator
|
132
|
-
opts.separator 'Common options:'
|
133
|
-
|
134
|
-
opts.option(:category_length__LENGTH, :C, Integer, "Length of category for TYPE=dat [Default: #{DEFAULT_CATEGORY_LENGTH}]")
|
45
|
+
abort "Please install the `#{gem}' gem. (#{err})"
|
135
46
|
end
|
136
47
|
|
137
48
|
end
|
138
49
|
|
139
50
|
end
|
51
|
+
|
52
|
+
require_relative 'cli/chardiff'
|
53
|
+
require_relative 'cli/filter'
|
54
|
+
require_relative 'cli/xml'
|
@@ -0,0 +1,116 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# bismas -- A Ruby client for BISMAS databases #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
12
|
+
# under the terms of the GNU Affero General Public License as published by #
|
13
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
14
|
+
# option) any later version. #
|
15
|
+
# #
|
16
|
+
# bismas is distributed in the hope that it will be useful, but #
|
17
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
18
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
19
|
+
# License for more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'csv'
|
28
|
+
|
29
|
+
module Bismas
|
30
|
+
|
31
|
+
class CLI
|
32
|
+
|
33
|
+
class Chardiff < self
|
34
|
+
|
35
|
+
def self.defaults
|
36
|
+
super.merge(
|
37
|
+
file1_encoding: DEFAULT_ENCODING,
|
38
|
+
file2_encoding: DEFAULT_ENCODING,
|
39
|
+
output_encoding: Encoding.default_external
|
40
|
+
)
|
41
|
+
end
|
42
|
+
|
43
|
+
def run(arguments)
|
44
|
+
quit unless arguments.empty?
|
45
|
+
|
46
|
+
output_encoding, frequencies = options[:output_encoding],
|
47
|
+
Hash.new { |h, k| h[k] = Hash.new { |i, j| i[j] = [0, 0] } }
|
48
|
+
|
49
|
+
2.times { |index|
|
50
|
+
file = "file#{index + 1}"
|
51
|
+
|
52
|
+
reader_options = {
|
53
|
+
encoding: "#{options[:"#{file}_encoding"]}:#{output_encoding}",
|
54
|
+
key: key = options[:"#{file}_key"]
|
55
|
+
}
|
56
|
+
|
57
|
+
Reader.parse_file(options[file.to_sym], reader_options) { |id, record|
|
58
|
+
frequency = Hash.new(0); record.delete(key)
|
59
|
+
|
60
|
+
record.each_value { |array| array.each { |value|
|
61
|
+
value.each_char { |char| frequency[char] += 1 } } }
|
62
|
+
|
63
|
+
frequencies.values_at(0, key ? id.to_i : $.).each { |hash|
|
64
|
+
frequency.each { |char, count| hash[char][index] += count } }
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
File.open_file(options[:output], {}, 'w') { |io|
|
69
|
+
begin
|
70
|
+
csv = CSV.new(io) << %w[id char count1 count2 diff]
|
71
|
+
|
72
|
+
frequencies.sort_by { |id,| id }.each { |id, hash|
|
73
|
+
hash.sort_by { |char,| char }.each { |char, (count1, count2)|
|
74
|
+
unless count1 == count2
|
75
|
+
csv << [id, char, count1, count2, count2 - count1]
|
76
|
+
end
|
77
|
+
}
|
78
|
+
}
|
79
|
+
ensure
|
80
|
+
csv.close if csv
|
81
|
+
end
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def opts(opts)
|
88
|
+
opts.option(:file1__FILE, :i, 'Path to input file 1 [Required]')
|
89
|
+
opts.option(:file2__FILE, :j, 'Path to input file 2 [Required]')
|
90
|
+
|
91
|
+
opts.separator
|
92
|
+
|
93
|
+
opts.option(:output__FILE, 'Path to output file [Default: STDOUT]')
|
94
|
+
|
95
|
+
opts.separator
|
96
|
+
opts.separator 'Input options:'
|
97
|
+
|
98
|
+
opts.option(:file1_encoding__ENCODING, :N, "File 1 encoding [Default: #{DEFAULT_ENCODING}]")
|
99
|
+
opts.option(:file2_encoding__ENCODING, :O, "File 2 encoding [Default: #{DEFAULT_ENCODING}]")
|
100
|
+
|
101
|
+
opts.separator
|
102
|
+
|
103
|
+
opts.option(:file1_key__KEY, :K, 'ID key of file 1')
|
104
|
+
opts.option(:file2_key__KEY, :L, 'ID key of file 2')
|
105
|
+
|
106
|
+
opts.separator
|
107
|
+
opts.separator 'Output options:'
|
108
|
+
|
109
|
+
opts.option(:output_encoding__ENCODING, :n, "Output encoding [Default: #{defaults[:output_encoding]}]")
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# bismas -- A Ruby client for BISMAS databases #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
12
|
+
# under the terms of the GNU Affero General Public License as published by #
|
13
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
14
|
+
# option) any later version. #
|
15
|
+
# #
|
16
|
+
# bismas is distributed in the hope that it will be useful, but #
|
17
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
18
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
19
|
+
# License for more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
module Bismas
|
28
|
+
|
29
|
+
class CLI
|
30
|
+
|
31
|
+
class Filter < self
|
32
|
+
|
33
|
+
TYPES = %w[dat dbm]
|
34
|
+
|
35
|
+
def self.defaults
|
36
|
+
super.merge(
|
37
|
+
type: TYPES.first,
|
38
|
+
key_format: '%s'
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def run(arguments)
|
43
|
+
quit unless arguments.empty?
|
44
|
+
|
45
|
+
klass = case type = options[:type]
|
46
|
+
when 'dat'
|
47
|
+
Writer
|
48
|
+
when 'dbm'
|
49
|
+
require_gem 'midos'
|
50
|
+
options[:output_encoding] ||= Midos::DEFAULT_ENCODING
|
51
|
+
Midos::Writer
|
52
|
+
else
|
53
|
+
quit "Unsupported type: #{type}. Must be one of: #{TYPES.join(', ')}."
|
54
|
+
end
|
55
|
+
|
56
|
+
Bismas.filter(klass, options, &method(:quit))
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def opts(opts)
|
62
|
+
opts.option(:input__FILE, 'Path to input file [Default: STDIN]')
|
63
|
+
|
64
|
+
opts.option(:output__FILE, 'Path to output file [Default: STDOUT]')
|
65
|
+
|
66
|
+
opts.separator
|
67
|
+
opts.separator 'Input options:'
|
68
|
+
|
69
|
+
opts.option(:input_encoding__ENCODING, :N, "Input encoding [Default: #{DEFAULT_ENCODING}]")
|
70
|
+
|
71
|
+
opts.separator
|
72
|
+
|
73
|
+
opts.option(:input_key__KEY, :K, 'ID key of input file')
|
74
|
+
|
75
|
+
opts.separator
|
76
|
+
|
77
|
+
opts.switch(:strict, :S, 'Turn parse warnings into errors')
|
78
|
+
|
79
|
+
opts.switch(:silent, :T, 'Silence parse warnings')
|
80
|
+
|
81
|
+
opts.separator
|
82
|
+
|
83
|
+
opts.switch(:legacy, :L, 'Use the legacy parser')
|
84
|
+
|
85
|
+
opts.separator
|
86
|
+
opts.separator 'Output options:'
|
87
|
+
|
88
|
+
opts.option(:type__TYPE, "Output file type (#{TYPES.join(', ')}) [Default: #{TYPES.first}]")
|
89
|
+
|
90
|
+
opts.separator
|
91
|
+
|
92
|
+
opts.option(:output_encoding__ENCODING, :n, 'Output encoding [Default: depends on TYPE]')
|
93
|
+
|
94
|
+
opts.separator
|
95
|
+
|
96
|
+
opts.option(:output_key__KEY, :k, 'ID key of output file')
|
97
|
+
opts.option(:key_format__KEY_FORMAT, :f, 'Key format [Default: %s]')
|
98
|
+
|
99
|
+
opts.separator
|
100
|
+
|
101
|
+
opts.option(:mapping__FILE_OR_YAML, 'Path to mapping file or YAML string')
|
102
|
+
|
103
|
+
opts.separator
|
104
|
+
|
105
|
+
opts.switch(:sort, 'Sort each record')
|
106
|
+
|
107
|
+
opts.separator
|
108
|
+
|
109
|
+
opts.option(:execute__CODE, 'Code to execute for each _record_ before mapping') { |e|
|
110
|
+
options[:execute] << e
|
111
|
+
}
|
112
|
+
|
113
|
+
opts.option(:execute_mapped__CODE, :E, 'Code to execute for each _record_ after mapping') { |e|
|
114
|
+
options[:execute_mapped] << e
|
115
|
+
}
|
116
|
+
|
117
|
+
opts.separator
|
118
|
+
|
119
|
+
opts.option(:padding_length__LENGTH, :P, Integer, "Length of padding for TYPE=dat [Default: #{DEFAULT_PADDING_LENGTH}]")
|
120
|
+
|
121
|
+
opts.separator
|
122
|
+
opts.separator 'Common options:'
|
123
|
+
|
124
|
+
opts.option(:category_length__LENGTH, :C, Integer, "Length of category for TYPE=dat [Default: #{DEFAULT_CATEGORY_LENGTH}]")
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# bismas -- A Ruby client for BISMAS databases #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# bismas is free software; you can redistribute it and/or modify it #
|
12
|
+
# under the terms of the GNU Affero General Public License as published by #
|
13
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
14
|
+
# option) any later version. #
|
15
|
+
# #
|
16
|
+
# bismas is distributed in the hope that it will be useful, but #
|
17
|
+
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
|
18
|
+
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public #
|
19
|
+
# License for more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with bismas. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
module Bismas
|
28
|
+
|
29
|
+
class CLI
|
30
|
+
|
31
|
+
class XML < self
|
32
|
+
|
33
|
+
def run(arguments)
|
34
|
+
require_gem 'builder'
|
35
|
+
|
36
|
+
quit unless arguments.empty?
|
37
|
+
|
38
|
+
quit 'Schema file is required' unless schema_file = options[:schema]
|
39
|
+
quit "No such file: #{schema_file}" unless File.readable?(schema_file)
|
40
|
+
|
41
|
+
schema = Schema.parse_file(schema_file)
|
42
|
+
|
43
|
+
reader_options = {
|
44
|
+
encoding: options[:encoding],
|
45
|
+
key: options[:key],
|
46
|
+
strict: options[:strict],
|
47
|
+
silent: options[:silent],
|
48
|
+
category_length: schema.category_length
|
49
|
+
}
|
50
|
+
|
51
|
+
File.open_file(options[:output], {}, 'wb') { |f|
|
52
|
+
xml = Builder::XmlMarkup.new(indent: 2, target: f)
|
53
|
+
xml.instruct!
|
54
|
+
|
55
|
+
xml.records(name: schema.name, description: schema.title) {
|
56
|
+
Reader.parse_file(options[:input], reader_options) { |id, record|
|
57
|
+
xml.record(id: id) {
|
58
|
+
record.sort_by { |key,| key }.each { |key, values|
|
59
|
+
values.each { |value|
|
60
|
+
xml.field(value, name: key, description: schema[key])
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def opts(opts)
|
72
|
+
opts.option(:input__FILE, 'Path to input file [Default: STDIN]')
|
73
|
+
|
74
|
+
opts.option(:output__FILE, 'Path to output file [Default: STDOUT]')
|
75
|
+
|
76
|
+
opts.option(:schema__FILE, 'Path to schema file [Required]')
|
77
|
+
|
78
|
+
opts.separator
|
79
|
+
|
80
|
+
opts.option(:encoding__ENCODING, :N, "Input encoding [Default: #{DEFAULT_ENCODING}]")
|
81
|
+
|
82
|
+
opts.separator
|
83
|
+
|
84
|
+
opts.option(:key__KEY, :K, 'ID key of input file')
|
85
|
+
|
86
|
+
opts.separator
|
87
|
+
|
88
|
+
opts.switch(:strict, :S, 'Turn parse warnings into errors')
|
89
|
+
|
90
|
+
opts.switch(:silent, :T, 'Silence parse warnings')
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
@@ -26,32 +26,50 @@
|
|
26
26
|
|
27
27
|
module Bismas
|
28
28
|
|
29
|
-
class
|
29
|
+
class Schema
|
30
30
|
|
31
31
|
FIELD_RE = %r{\Afeld\s+=\s+(\d+)}i
|
32
32
|
|
33
33
|
CATEGORY_RE = lambda { |category_length|
|
34
34
|
%r{\A(.{#{category_length}})"(.+?)"} }
|
35
35
|
|
36
|
-
|
37
|
-
Bismas.amend_encoding(options)
|
36
|
+
class << self
|
38
37
|
|
39
|
-
|
38
|
+
def parse(*args)
|
39
|
+
new.parse(*args)
|
40
|
+
end
|
40
41
|
|
41
|
-
|
42
|
+
def parse_file(file, options = {})
|
43
|
+
Bismas.amend_encoding(options)
|
44
|
+
File.open(file, 'rb', options, &method(:parse))
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
def initialize
|
50
|
+
@title, @name, @category_length, @categories = nil, nil, nil, {}
|
51
|
+
end
|
52
|
+
|
53
|
+
attr_accessor :title, :name, :category_length
|
54
|
+
|
55
|
+
def parse(io)
|
56
|
+
category_re = nil
|
57
|
+
|
58
|
+
io.each { |line|
|
42
59
|
case line
|
43
|
-
when FIELD_RE
|
44
|
-
category_re = CATEGORY_RE[$1.to_i]
|
45
60
|
when category_re
|
46
|
-
|
61
|
+
self[$1.strip] = $2.strip
|
62
|
+
when FIELD_RE
|
63
|
+
category_re = CATEGORY_RE[
|
64
|
+
self.category_length = $1.to_i]
|
65
|
+
when String
|
66
|
+
title ? name ? nil :
|
67
|
+
self.name = line.strip :
|
68
|
+
self.title = line.strip
|
47
69
|
end
|
48
70
|
}
|
49
71
|
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def initialize
|
54
|
-
@categories = {}
|
72
|
+
self
|
55
73
|
end
|
56
74
|
|
57
75
|
def categories
|
@@ -66,10 +84,6 @@ module Bismas
|
|
66
84
|
@categories[key] = value
|
67
85
|
end
|
68
86
|
|
69
|
-
def to_h
|
70
|
-
@categories
|
71
|
-
end
|
72
|
-
|
73
87
|
end
|
74
88
|
|
75
89
|
end
|
data/lib/bismas/version.rb
CHANGED
data/lib/bismas/writer.rb
CHANGED
@@ -85,9 +85,11 @@ module Bismas
|
|
85
85
|
def write_i(id, record, io = io())
|
86
86
|
return if record.empty?
|
87
87
|
|
88
|
-
|
88
|
+
if key && !record.key?(key)
|
89
|
+
record = { key => id || auto_id.call }.update(record)
|
90
|
+
end
|
89
91
|
|
90
|
-
|
92
|
+
category_format, fs = "%-#{@category_length}s", @chars[:fs]
|
91
93
|
|
92
94
|
io << @chars[:rs]
|
93
95
|
|
data/spec/bismas/reader_spec.rb
CHANGED
@@ -1,15 +1,27 @@
|
|
1
1
|
describe Bismas::Reader do
|
2
2
|
|
3
|
+
def parse_file(options = {})
|
4
|
+
described_class.parse_file(data('test.dat'), options)
|
5
|
+
end
|
6
|
+
|
7
|
+
example do
|
8
|
+
expect(parse_file.size).to eq(14)
|
9
|
+
end
|
10
|
+
|
11
|
+
example do
|
12
|
+
expect(parse_file(encoding: nil).size).to eq(14)
|
13
|
+
end
|
14
|
+
|
3
15
|
example do
|
4
|
-
expect(
|
16
|
+
expect(parse_file(encoding: Encoding::CP850).size).to eq(14)
|
5
17
|
end
|
6
18
|
|
7
19
|
example do
|
8
|
-
expect
|
20
|
+
expect{silence{parse_file(encoding: '')}}.to raise_error(TypeError, 'no implicit conversion of nil into String')
|
9
21
|
end
|
10
22
|
|
11
23
|
example do
|
12
|
-
expect(
|
24
|
+
expect(parse_file(strict: true)).to eq({
|
13
25
|
1 => { "005" => ["Bock, F."], "020" => ["Zur Geschichte des Schlagwortkatalogs in Praxis und Theorie"], "030" => ["Zentralblatt f\x81r Bibliothekswesen. 40(1923), S.494-502."], "055" => ["1923"], "060" => ["Geschichte des Schlagwortkataloges"], "059" => ["d"], "053" => ["a"], "120" => ["D"] },
|
14
26
|
2 => { "005" => ["Bravo, B.R. => Rodriguez Bravo, B."], "150" => ["22. 4.2007 19:43:53"] },
|
15
27
|
3 => { "005" => ["Simmons, P."], "020" => ["Microcomputer software for ISO 2709 record conversion"], "030" => ["Microcomputers for information management. 6(1989), S.197-205."], "055" => ["1989"], "060" => ["Bibliographische Software"], "053" => ["a"], "059" => ["e"], "100" => ["ISO 2709"], "065" => ["Datenformate"] },
|
@@ -0,0 +1,56 @@
|
|
1
|
+
describe Bismas::Schema do
|
2
|
+
|
3
|
+
subject { described_class.parse_file(data('test.cat')) }
|
4
|
+
|
5
|
+
example do
|
6
|
+
expect(subject.title).to eq(*encode("Literatur zur Inhaltserschlie\xE1ung"))
|
7
|
+
end
|
8
|
+
|
9
|
+
example do
|
10
|
+
expect(subject.name).to eq('LIT')
|
11
|
+
end
|
12
|
+
|
13
|
+
example do
|
14
|
+
expect(subject.category_length).to eq(4)
|
15
|
+
end
|
16
|
+
|
17
|
+
example do
|
18
|
+
expect(subject.categories).to eq(%w[
|
19
|
+
005 010 015 017 020 025 027 030 035 045 050 051 052 053 055 056 057 058
|
20
|
+
059 060 065 070 080 100 101 102 103 104 105 106 107 108 109 110 115 120
|
21
|
+
122 125 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
22
|
+
146 148 150 152
|
23
|
+
])
|
24
|
+
end
|
25
|
+
|
26
|
+
example do
|
27
|
+
expect(subject['000']).to be_nil
|
28
|
+
end
|
29
|
+
|
30
|
+
{
|
31
|
+
'005' => '1. Verfasser',
|
32
|
+
'015' => '3. Verfasser',
|
33
|
+
'020' => 'Hauptsachtitel',
|
34
|
+
'027' => 'Ausgabevermerk',
|
35
|
+
'035' => 'Serientitel',
|
36
|
+
'050' => 'Verlag',
|
37
|
+
'052' => 'ISBN',
|
38
|
+
'055' => 'Erscheinungsjahr',
|
39
|
+
'057' => 'Inhalt',
|
40
|
+
'059' => 'Sprache',
|
41
|
+
'065' => 'Themenbereich',
|
42
|
+
'080' => 'Behandelte Form',
|
43
|
+
'101' => 'Objekt',
|
44
|
+
'103' => 'Objekt',
|
45
|
+
'115' => 'Fachgebiet',
|
46
|
+
'120' => 'Land/Ort',
|
47
|
+
'122' => 'Sparte',
|
48
|
+
'130' => 'DDC',
|
49
|
+
'140' => 'Biographierte',
|
50
|
+
'145' => 'Bild',
|
51
|
+
'148' => 'Einstufung',
|
52
|
+
'150' => 'Datum1',
|
53
|
+
'152' => 'Datum2'
|
54
|
+
}.each { |k, v| example { expect(subject[k]).to eq(*encode(v)) } }
|
55
|
+
|
56
|
+
end
|
data/spec/data/test.cat
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
Literatur zur Inhaltserschlie�ung
|
2
|
+
LIT
|
3
|
+
#PARAMETER#
|
4
|
+
Feld = 4
|
5
|
+
#INDEX#
|
6
|
+
#Kategorien#
|
7
|
+
005 " 1. Verfasser"!
|
8
|
+
010 " 2. Verfasser"
|
9
|
+
015 " 3. Verfasser" W
|
10
|
+
017 " Herausgeber" W
|
11
|
+
020 " Hauptsachtitel"!
|
12
|
+
025 " Zusatz HST"
|
13
|
+
027 " Ausgabevermerk"
|
14
|
+
030 " Quelle"!
|
15
|
+
035 " Serientitel"
|
16
|
+
045 " Verlagsort"
|
17
|
+
050 " Verlag"
|
18
|
+
051 " Umfang"
|
19
|
+
052 " ISBN" W
|
20
|
+
053 " Doktyp"! W
|
21
|
+
055 "Erscheinungsjahr"!
|
22
|
+
056 " Abstract"! W
|
23
|
+
057 " Inhalt" W
|
24
|
+
058 " Fu�note" W
|
25
|
+
059 " Sprache"! W
|
26
|
+
060 " Themenbereich"! N
|
27
|
+
065 " Themenbereich" N
|
28
|
+
070 " Themenbereich" N W
|
29
|
+
080 " Behandelte Form"!N W
|
30
|
+
100 " Objekt"!
|
31
|
+
101 " Objekt"
|
32
|
+
102 " Objekt"
|
33
|
+
103 " Objekt"
|
34
|
+
104 " Objekt"
|
35
|
+
105 " Objekt"
|
36
|
+
106 " Objekt"
|
37
|
+
107 " Objekt"
|
38
|
+
108 " Objekt"
|
39
|
+
109 " Objekt" W
|
40
|
+
110 " Hilfsmittel"! W
|
41
|
+
115 " Fachgebiet"! N W
|
42
|
+
120 " Land/Ort"! W
|
43
|
+
122 " Sparte" N W
|
44
|
+
125 " Signatur" W
|
45
|
+
130 " DDC" W
|
46
|
+
131 " LCC" W
|
47
|
+
132 " RVK" W
|
48
|
+
133 " BK" W
|
49
|
+
134 "Eppelsheimer SBB" W
|
50
|
+
135 " RSWK" W
|
51
|
+
136 " LCSH" W
|
52
|
+
137 " PRECIS" W
|
53
|
+
138 " COMPASS" W
|
54
|
+
139 " GHBS" W
|
55
|
+
140 " Biographierte" W
|
56
|
+
141 " ASB" W
|
57
|
+
142 " SSD" W
|
58
|
+
143 " SfB" W
|
59
|
+
144 " KAB" W
|
60
|
+
145 " Bild" B("" "d:\Irfan\i_view32.exe cat$") W
|
61
|
+
146 " Abbildung" N
|
62
|
+
148 " Einstufung" N W
|
63
|
+
150 " Datum1"! DC
|
64
|
+
152 " Datum2" DM
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bismas
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cyclops
|
@@ -89,7 +89,9 @@ dependencies:
|
|
89
89
|
description: Access BISMAS databases from Ruby.
|
90
90
|
email: jens.wille@gmail.com
|
91
91
|
executables:
|
92
|
-
- bismas
|
92
|
+
- bismas-chardiff
|
93
|
+
- bismas-filter
|
94
|
+
- bismas2xml
|
93
95
|
extensions: []
|
94
96
|
extra_rdoc_files:
|
95
97
|
- README
|
@@ -100,18 +102,25 @@ files:
|
|
100
102
|
- ChangeLog
|
101
103
|
- README
|
102
104
|
- Rakefile
|
103
|
-
- bin/bismas
|
105
|
+
- bin/bismas-chardiff
|
106
|
+
- bin/bismas-filter
|
107
|
+
- bin/bismas2xml
|
104
108
|
- lib/bismas.rb
|
105
109
|
- lib/bismas/base.rb
|
106
|
-
- lib/bismas/categories.rb
|
107
110
|
- lib/bismas/cli.rb
|
111
|
+
- lib/bismas/cli/chardiff.rb
|
112
|
+
- lib/bismas/cli/filter.rb
|
113
|
+
- lib/bismas/cli/xml.rb
|
108
114
|
- lib/bismas/mapping.rb
|
109
115
|
- lib/bismas/parser.rb
|
110
116
|
- lib/bismas/reader.rb
|
117
|
+
- lib/bismas/schema.rb
|
111
118
|
- lib/bismas/version.rb
|
112
119
|
- lib/bismas/writer.rb
|
113
120
|
- spec/bismas/parser_spec.rb
|
114
121
|
- spec/bismas/reader_spec.rb
|
122
|
+
- spec/bismas/schema_spec.rb
|
123
|
+
- spec/data/test.cat
|
115
124
|
- spec/data/test.dat
|
116
125
|
- spec/spec_helper.rb
|
117
126
|
homepage: http://github.com/blackwinter/bismas
|
@@ -120,14 +129,19 @@ licenses:
|
|
120
129
|
metadata: {}
|
121
130
|
post_install_message: |2+
|
122
131
|
|
123
|
-
bismas-0.
|
132
|
+
bismas-0.3.0 [2015-12-11]:
|
124
133
|
|
125
|
-
* Added
|
126
|
-
*
|
134
|
+
* Added +bismas2xml+ executable.
|
135
|
+
* Added +bismas-chardiff+ executable.
|
136
|
+
* Renamed +bismas+ executable to +bismas-filter+.
|
137
|
+
* Renamed Bismas::Categories to Bismas::Schema and extended scope.
|
138
|
+
* Fixed Bismas.amend_encoding to use default encoding when encoding option is
|
139
|
+
+nil+.
|
140
|
+
* Changed Bismas::Writer to insert key field at beginning of record.
|
127
141
|
|
128
142
|
rdoc_options:
|
129
143
|
- "--title"
|
130
|
-
- bismas Application documentation (v0.
|
144
|
+
- bismas Application documentation (v0.3.0)
|
131
145
|
- "--charset"
|
132
146
|
- UTF-8
|
133
147
|
- "--line-numbers"
|
@@ -148,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
148
162
|
version: '0'
|
149
163
|
requirements: []
|
150
164
|
rubyforge_project:
|
151
|
-
rubygems_version: 2.5.
|
165
|
+
rubygems_version: 2.5.1
|
152
166
|
signing_key:
|
153
167
|
specification_version: 4
|
154
168
|
summary: A Ruby client for BISMAS databases.
|