athena 0.0.1.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ = Revision history for athena
2
+
3
+ == x.y.z [yyyy-mm-dd]
4
+
5
+ * ...
data/README ADDED
@@ -0,0 +1,33 @@
1
+ = athena - Convert database files to various formats
2
+
3
+ == VERSION
4
+
5
+ This documentation refers to athena version 0.0.1
6
+
7
+
8
+ == DESCRIPTION
9
+
10
+ TODO: well, the description... ;-)
11
+
12
+
13
+ == AUTHORS
14
+
15
+ * Jens Wille <mailto:jens.wille@uni-koeln.de>
16
+
17
+
18
+ == LICENSE AND COPYRIGHT
19
+
20
+ Copyright (C) 2007 University of Cologne,
21
+ Albertus-Magnus-Platz, 50932 Cologne, Germany
22
+
23
+ athena is free software: you can redistribute it and/or modify it under the
24
+ terms of the GNU General Public License as published by the Free Software
25
+ Foundation, either version 3 of the License, or (at your option) any later
26
+ version.
27
+
28
+ athena is distributed in the hope that it will be useful, but WITHOUT ANY
29
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
31
+
32
+ You should have received a copy of the GNU General Public License along with
33
+ athena. If not, see <http://www.gnu.org/licenses/>.
@@ -0,0 +1,23 @@
1
+ require 'lib/athena/version'
2
+
3
+ FILES = FileList['lib/**/*.rb'].to_a
4
+ EXECS = FileList['bin/*'].to_a
5
+ RDOCS = %w[README COPYING ChangeLog]
6
+ OTHER = FileList['[A-Z]*', 'example/*'].to_a
7
+
8
+ task(:doc_spec) {{
9
+ :title => 'athena Application documentation',
10
+ :rdoc_files => RDOCS + FILES
11
+ }}
12
+
13
+ task(:gem_spec) {{
14
+ :name => 'athena',
15
+ :version => Athena::VERSION,
16
+ :summary => 'Convert database files to various formats',
17
+ :files => FILES + EXECS + OTHER,
18
+ :require_path => 'lib',
19
+ :bindir => 'bin',
20
+ :executables => EXECS,
21
+ :extra_rdoc_files => RDOCS,
22
+ :dependencies => %w[xmlstreamin ruby-nuggets]
23
+ }}
@@ -0,0 +1,165 @@
1
+ #! /usr/bin/ruby
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # athena -- Convert database files to various formats #
7
+ # #
8
+ # Copyright (C) 2007 University of Cologne, #
9
+ # Albertus-Magnus-Platz, #
10
+ # 50932 Cologne, Germany #
11
+ # #
12
+ # Authors: #
13
+ # Jens Wille <jens.wille@uni-koeln.de> #
14
+ # #
15
+ # athena is free software; you can redistribute it and/or modify it under the #
16
+ # terms of the GNU General Public License as published by the Free Software #
17
+ # Foundation; either version 3 of the License, or (at your option) any later #
18
+ # version. #
19
+ # #
20
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
21
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
22
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
23
+ # details. #
24
+ # #
25
+ # You should have received a copy of the GNU General Public License along #
26
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
27
+ # #
28
+ ###############################################################################
29
+ #++
30
+
31
+ require 'optparse'
32
+ require 'yaml'
33
+
34
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
35
+
36
+ require 'athena'
37
+
38
+ USAGE = "Usage: #{$0} [-h|--help] [options]"
39
+ abort USAGE if ARGV.empty?
40
+
41
+ # Global variable to handle verbosity
42
+ $_VERBOSE = {}
43
+
44
+ options = {
45
+ :config => 'config.yaml',
46
+ :input => STDIN,
47
+ :output => STDOUT,
48
+ :target => nil
49
+ }
50
+
51
+ OptionParser.new { |opts|
52
+ opts.banner = USAGE
53
+
54
+ opts.separator ''
55
+ opts.separator 'Options:'
56
+
57
+ opts.on('-c', '--config YAML', "Config file [Default: #{options[:config]}#{' (currently not present)' unless File.readable? options[:config]}]") { |f|
58
+ abort "Can't find config file: #{f}." unless File.readable? f
59
+
60
+ options[:config] = f
61
+ }
62
+
63
+ opts.separator ''
64
+
65
+ opts.on('-i', '--input FILE', "Input file [Default: #{options[:input] == STDIN ? 'STDIN' : options[:input]}]") { |f|
66
+ abort "Can't find input file: #{f}." unless File.readable? f
67
+
68
+ options[:input] = File.open(f, 'r')
69
+
70
+ p = File.basename(f).split('.')
71
+ options[:spec_fallback] = p.last.downcase
72
+ options[:target_fallback] = p[0..-2].join('.')
73
+ }
74
+
75
+ opts.on('-s', '--spec SPEC', "Input format (spec) [Default: file ending of <input-file>]") { |s|
76
+ options[:spec] = s.downcase
77
+ }
78
+
79
+ opts.on('-L', '--list-specs', "List available input formats (specs) and exit") {
80
+ puts "Available input formats (specs):"
81
+ Athena.input_formats.each { |s, k|
82
+ puts " - #{s} = #{k}"
83
+ }
84
+
85
+ exit 0
86
+ }
87
+
88
+ opts.separator ''
89
+
90
+ opts.on('-o', '--output FILE', "Output file [Default: #{options[:output] == STDOUT ? 'STDOUT' : options[:output]}]") { |f|
91
+ options[:output] = File.open(f, 'w')
92
+
93
+ options[:format_fallback] = f.split('.').last.downcase
94
+ }
95
+
96
+ opts.on('-f', '--format FORMAT', "Output format [Default: file ending of <output-file>]") { |f|
97
+ options[:format] = f.downcase
98
+ }
99
+
100
+ opts.on('-l', '--list-formats', "List available output formats and exit") {
101
+ puts "Available output formats:"
102
+ Athena.output_formats.each { |f, k|
103
+ puts " - #{f} = #{k}"
104
+ }
105
+
106
+ exit 0
107
+ }
108
+
109
+ opts.separator ''
110
+
111
+ opts.on('-t', '--target ID', "Target whose config to use [Default: <input-file> minus file ending,", "plus '.<spec>', plus ':<format>' (reversely in turn)]") { |t|
112
+ options[:target] = t
113
+ }
114
+
115
+ opts.separator ''
116
+ opts.separator 'Generic options:'
117
+
118
+ opts.on('-v', '--verbose [WHAT]', "Be verbose about what's being done. Optional argument is a comma-separated", "list of what should be output, or 'all' [Default: 'all']") { |what|
119
+ if what.nil? || what == 'all'
120
+ $_VERBOSE.default = true
121
+ else
122
+ what.split(',').each { |w|
123
+ $_VERBOSE[w.to_sym] = true
124
+ }
125
+ end
126
+ }
127
+
128
+ opts.on('-h', '--help', 'Print this help message and exit') {
129
+ abort opts.to_s
130
+ }
131
+
132
+ opts.on('--version', 'Print program version and exit') {
133
+ abort "#{File.basename($0)} v#{Athena::VERSION}"
134
+ }
135
+ }.parse!
136
+
137
+ spec = options[:spec] || options[:spec_fallback]
138
+ abort "No input format (spec) specified and none could be inferred." unless spec
139
+ abort "Invalid input format (spec): #{spec}. Use '-L' to get a list of available specs." unless Athena.valid_input_format? spec
140
+
141
+ format = options[:format] || options[:format_fallback]
142
+ abort "No output format specified and none could be inferred." unless format
143
+ abort "Invalid output format: #{format}. Use '-l' to get a list of available formats." unless Athena.valid_output_format? format
144
+
145
+ yaml = YAML.load_file(options[:config])
146
+ if t = options[:target]
147
+ target = t
148
+ config = yaml[t.to_sym]
149
+ else
150
+ [options[:target_fallback] || 'generic', ".#{spec}", ":#{format}"].inject([]) { |s, t|
151
+ s << (s.last ? s.last + t : t)
152
+ }.reverse.find { |t|
153
+ target = t
154
+ config = yaml[t.to_sym]
155
+ }
156
+ end
157
+ abort "Config not found for target: #{target}." unless config
158
+
159
+ records = Athena.parser(config, spec).parse(options[:input]) { |record|
160
+ options[:output].puts record.to(format)
161
+ }
162
+
163
+ Athena::Util.verbose(:count) do
164
+ spit records.size
165
+ end
@@ -0,0 +1,72 @@
1
+ :example:
2
+ :__record_element: "record"
3
+ :author: "author"
4
+ :title:
5
+ :elements:
6
+ - "title/main"
7
+ - "title/subtitle"
8
+ :string: "%s: %s"
9
+ :empty: ">>n/a<<"
10
+ :place:
11
+ :elements:
12
+ - "city"
13
+ - "country"
14
+ :separator: " / "
15
+ :multiple-fields-per-element:
16
+ :__record_element: "record"
17
+ :author:
18
+ :elements:
19
+ - "author"
20
+ - "city"
21
+ - "title/main"
22
+ :string: "author=%s (city=%s) [title/main=%s]"
23
+ :title:
24
+ :elements:
25
+ - "title/main"
26
+ - "title/subtitle"
27
+ - "author"
28
+ :string: "title/main=%s: title/subtitle=%s (author=%s)"
29
+ :empty: ">>n/a<<"
30
+ :place:
31
+ :elements:
32
+ - "city"
33
+ - "country"
34
+ - "title"
35
+ :string: "city=%s / country=%s (title=%s)"
36
+ :sisis-ex:
37
+ :__record_element: "0000" # KatalogNr
38
+ :author:
39
+ :elements:
40
+ - "0100" # VerfAnsetz
41
+ - "0101" # Verf_Ordn
42
+ :string: "%s (%s)"
43
+ :title:
44
+ :elements:
45
+ - "0331" # HST
46
+ - "0335" # HSTZusatz
47
+ - "0370" # Untertitstab
48
+ :string: "%s : %s [%s]"
49
+ :place: "2028" # ort2sb
50
+ :sisis-multiple-fields-per-element:
51
+ :__record_element: "0000"
52
+ :author:
53
+ :elements:
54
+ - "0100"
55
+ - "0101"
56
+ - "0331"
57
+ - "2028"
58
+ :string: "VerfAnsetz=%s (Verf_Ordn=%s) [HST=%s] / ort2sb=%s"
59
+ :title:
60
+ :elements:
61
+ - "0331"
62
+ - "0335"
63
+ - "0370"
64
+ - "0100"
65
+ :string: "HST=%s : HSTZusatz=%s [Untertitstab=%s] (VerfAnsetz=%s)"
66
+ :place:
67
+ :elements:
68
+ - "2028"
69
+ - "0335"
70
+ - "0370"
71
+ - "0100"
72
+ :string: "ort2sb=%s (HSTZusatz=%s [Untertitstab=%s] / VerfAnsetz=%s)"
@@ -0,0 +1,26 @@
1
+ <root>
2
+ <record>
3
+ <author>
4
+ <first>John Doe</first>
5
+ <second>John Q.</second>
6
+ <third>JJ</third>
7
+ </author>
8
+ <title>
9
+ <main>Just kiddin'</main>
10
+ <subtitle>heh?</subtitle>
11
+ </title>
12
+ <city>Nowhere</city>
13
+ <country>None</country>
14
+ </record>
15
+ <record>
16
+ <author>
17
+ Jane Doe
18
+ <separator />
19
+ JD
20
+ </author>
21
+ <title>
22
+ <main>No title</main>
23
+ </title>
24
+ <city>Nowhere</city>
25
+ </record>
26
+ </root>
@@ -0,0 +1,90 @@
1
+ 0000:3
2
+ 0001:000000003
3
+ 0002:02.05.2001
4
+ 0003:27.08.2002
5
+ 0015:ger; lat
6
+ 0036:m
7
+ 0100.001:[Pecka, Michael]
8
+ 0110.001:[Pieczek, Michael]
9
+ 0331:Denckwürdiges Geheimnuß
10
+ 0335:Teutsche Erklärung des Kupfferstücks
11
+ 0370.001:Dum tua privato fessus das lumina somno
12
+ 0370.002:Einer/ oder gar drey müssen für das Volck sterben
13
+ 0424:1620
14
+ 0425:[ca. 1620]
15
+ 0433:1 Bl.
16
+ 0434:1 Ill.; Radierung 19,5 x 27,5 cm(nach einem Kupferstich von Gaspar Dooms)
17
+ 0435:Satzspiegel 38 x 30 cm
18
+ 0440.001:[S.l.]
19
+ 0509:Verfasser ermittelt aus Nachweis
20
+ 0511:Erscheinungsjahr ermittelt aus Inhalt und Nachweis
21
+ 0527.001:Andere Ausgabe: Pfeffer, Maria
22
+ 0720.001:Politisches Flugblatt; Prokaiserliches Flugblatt
23
+ 0721.001:Dreissigjähriger Krieg; Böhmisch-Pfälzischer Krieg; Krieg; 1600-1650
24
+ 0722.001:Böhmen <Königreich>; Schlesien; Mähren; Lausitz; Bayern; Österreich; Prag
25
+ 0723.001:Ferdinand <Römisch-Deutsches Reich, Kaiser, II.>; Christus
26
+ 1105.001:Einbl. V,8 b-10
27
+ 1105.002:Einbl. V,60
28
+ 1125.001:Blattmaß 38 x 30 cm; Hinterklebt ; Klebereste
29
+ 1125.002:Blattmaß 30 x 31,5 cm; Fragment: unterer Textteil fehlt teilweise
30
+ 1145.001:ei
31
+ 1145.002:ei
32
+ 2005:Metallschnittrahmen; Metallschnittleiste als Spaltentrenner; Zahlen als Marginalien
33
+ 2006:Deutsche illustrierte Flugblätter des 16. und 17. Jahrhunderts . Hrsg. von Wolfgang Harms. Bd. 2
34
+ 2008:Denkwürdig Geheimnis Prophezeiung Zustand Böhmen Unwesen
35
+ 2018:Lateinischer Text in der Radierung als Beischriften; Im unteren Teil des Bildes Versform
36
+ 2023:¤ 61B2(Ferdinand <Römisch-Deutsches Reich, Kaiser, II.>)(+1)
37
+ 2028:BLA
38
+ 2029:001
39
+ 2030.001:http://zoom.bib-bvb.de/StyleServer/calcrgn?cat=einbl&item=/300000183_0_r.sid&wid=750&hei=500&style=bsb/einbl.xsl&plugin=false
40
+ 2030.002:http://zoom.bib-bvb.de/StyleServer/calcrgn?cat=einbl&item=/300000368_0_r.sid&wid=750&hei=500&style=bsb/einbl.xsl&plugin=false
41
+ 9999:
42
+ 0000:4
43
+ 0001:000000004
44
+ 0002:03.05.2001
45
+ 0003:29.08.2002
46
+ 0015:ger
47
+ 0036:m
48
+ 0331:Machometische Zanck= und Haderkatzen
49
+ 0370.001:UNser Planet Ist Machomet, Saturnisch Thier Sind wir allhier
50
+ 0424:1621
51
+ 0425:1621
52
+ 0433:1 Bl.
53
+ 0434:1 Ill.; Kupferstich 14 x 24 cm
54
+ 0435:Satzspiegel 30,5 x 24,5 cm
55
+ 0440.001:[S.l.]
56
+ 0720.001:Politische Flugblatt; Antiunionistisches Flugblatt
57
+ 0721.001:Dreissigjähriger Krieg; Böhmisch-Pfälzischer Krieg
58
+ 0723.001:Mu.hammad; Mohammed
59
+ 1105.001:Einbl. V,8 b-11
60
+ 1125.001:Blattmaß 32,5 x 26 cm; Hinterklebt
61
+ 1145.001:ei
62
+ 2005:Metallschnittleisten als Spaltentrenner; Textinitiale
63
+ 2006:Paas, John Roger: The German Political Broadsheet (1600 - 1700)
64
+ 2008:Mohammed; Zank; Hader; Katze
65
+ 2018:Versform; 4 Spalten
66
+ 2023:¤ 34B12 - Katze ¤ 25F(+51) - kämpfende Tiere; aggressive Beziehungen
67
+ 2029:002
68
+ 2030.001:http://zoom.bib-bvb.de/StyleServer/calcrgn?cat=einbl&item=/300000184_0_r.sid&wid=750&hei=500&style=bsb/einbl.xsl&plugin=false
69
+ 9999:
70
+ 0000:5
71
+ 0001:000000005
72
+ 0002:03.05.2001
73
+ 0003:29.08.2002
74
+ 0015:ger
75
+ 0036:m
76
+ 0331:Alles hatt Sein Zeydt
77
+ 0331.001:-- Hää?
78
+ 0424:1621
79
+ 0425:[1621]
80
+ 0433:1 Bl.
81
+ 0434:1 Ill.; 16 x 27,5 cm
82
+ 0435:Satspiegel 16 x 27,5 cm
83
+ 0440.001:[S.l.]
84
+ 0511:Erscheinungsjahr ermittelt aus Bildinhalt und Nachweis
85
+ 0511:Erscheinungsjahr ermittelt aus Bildinhalt und Nachweis
86
+ 0527.001:Andere Ausgabe: Einbl. V,8 b-8; Deutsche illustrierte Flugblätter des 16. und 17. Jahrhunderts
87
+ 2028:BLUB
88
+ 2029:003
89
+ 2030.001:http://zoom.bib-bvb.de/StyleServer/calcrgn?cat=einbl&item=/300000185_0_r.sid&wid=750&hei=500&style=bsb/einbl.xsl&plugin=false
90
+ 9999:
@@ -0,0 +1,68 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # athena -- Convert database files to various formats #
5
+ # #
6
+ # Copyright (C) 2007 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ # Athena is a library to convert (mainly) prometheus database files to various
30
+ # output formats. It's accompanied by a corresponding script that gives access
31
+ # to all its converting features.
32
+ #
33
+ # In order to support additional input and/or output formats, Athena::Formats
34
+ # needs to be sub-classed and, respectively, an instance method _parse_ or a
35
+ # class method _convert_ supplied. This way, a specific format can even function
36
+ # as both input and output format.
37
+
38
+ require 'athena/util'
39
+ require 'athena/parser'
40
+ require 'athena/record'
41
+ require 'athena/formats'
42
+ require 'athena/version'
43
+
44
+ module Athena
45
+
46
+ extend self
47
+
48
+ def parser(config, format)
49
+ Parser.new(config, format)
50
+ end
51
+
52
+ def input_formats
53
+ Formats.formats[:in].sort
54
+ end
55
+
56
+ def valid_input_format?(format)
57
+ Formats.valid_format?(:in, format)
58
+ end
59
+
60
+ def output_formats
61
+ Formats.formats[:out].sort
62
+ end
63
+
64
+ def valid_output_format?(format)
65
+ Formats.valid_format?(:out, format)
66
+ end
67
+
68
+ end