athena 0.0.1.53

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ = Revision history for athena
2
+
3
+ == x.y.z [yyyy-mm-dd]
4
+
5
+ * ...
data/README ADDED
@@ -0,0 +1,33 @@
1
+ = athena - Convert database files to various formats
2
+
3
+ == VERSION
4
+
5
+ This documentation refers to athena version 0.0.1
6
+
7
+
8
+ == DESCRIPTION
9
+
10
+ TODO: well, the description... ;-)
11
+
12
+
13
+ == AUTHORS
14
+
15
+ * Jens Wille <mailto:jens.wille@uni-koeln.de>
16
+
17
+
18
+ == LICENSE AND COPYRIGHT
19
+
20
+ Copyright (C) 2007 University of Cologne,
21
+ Albertus-Magnus-Platz, 50932 Cologne, Germany
22
+
23
+ athena is free software: you can redistribute it and/or modify it under the
24
+ terms of the GNU General Public License as published by the Free Software
25
+ Foundation, either version 3 of the License, or (at your option) any later
26
+ version.
27
+
28
+ athena is distributed in the hope that it will be useful, but WITHOUT ANY
29
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
31
+
32
+ You should have received a copy of the GNU General Public License along with
33
+ athena. If not, see <http://www.gnu.org/licenses/>.
@@ -0,0 +1,23 @@
1
+ require 'lib/athena/version'
2
+
3
+ FILES = FileList['lib/**/*.rb'].to_a
4
+ EXECS = FileList['bin/*'].to_a
5
+ RDOCS = %w[README COPYING ChangeLog]
6
+ OTHER = FileList['[A-Z]*', 'example/*'].to_a
7
+
8
+ task(:doc_spec) {{
9
+ :title => 'athena Application documentation',
10
+ :rdoc_files => RDOCS + FILES
11
+ }}
12
+
13
+ task(:gem_spec) {{
14
+ :name => 'athena',
15
+ :version => Athena::VERSION,
16
+ :summary => 'Convert database files to various formats',
17
+ :files => FILES + EXECS + OTHER,
18
+ :require_path => 'lib',
19
+ :bindir => 'bin',
20
+ :executables => EXECS,
21
+ :extra_rdoc_files => RDOCS,
22
+ :dependencies => %w[xmlstreamin ruby-nuggets]
23
+ }}
@@ -0,0 +1,165 @@
1
+ #! /usr/bin/ruby
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # athena -- Convert database files to various formats #
7
+ # #
8
+ # Copyright (C) 2007 University of Cologne, #
9
+ # Albertus-Magnus-Platz, #
10
+ # 50932 Cologne, Germany #
11
+ # #
12
+ # Authors: #
13
+ # Jens Wille <jens.wille@uni-koeln.de> #
14
+ # #
15
+ # athena is free software; you can redistribute it and/or modify it under the #
16
+ # terms of the GNU General Public License as published by the Free Software #
17
+ # Foundation; either version 3 of the License, or (at your option) any later #
18
+ # version. #
19
+ # #
20
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
21
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
22
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
23
+ # details. #
24
+ # #
25
+ # You should have received a copy of the GNU General Public License along #
26
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
27
+ # #
28
+ ###############################################################################
29
+ #++
30
+
31
+ require 'optparse'
32
+ require 'yaml'
33
+
34
+ $: << File.join(File.dirname(__FILE__), '..', 'lib')
35
+
36
+ require 'athena'
37
+
38
+ USAGE = "Usage: #{$0} [-h|--help] [options]"
39
+ abort USAGE if ARGV.empty?
40
+
41
+ # Global variable to handle verbosity
42
+ $_VERBOSE = {}
43
+
44
+ options = {
45
+ :config => 'config.yaml',
46
+ :input => STDIN,
47
+ :output => STDOUT,
48
+ :target => nil
49
+ }
50
+
51
+ OptionParser.new { |opts|
52
+ opts.banner = USAGE
53
+
54
+ opts.separator ''
55
+ opts.separator 'Options:'
56
+
57
+ opts.on('-c', '--config YAML', "Config file [Default: #{options[:config]}#{' (currently not present)' unless File.readable? options[:config]}]") { |f|
58
+ abort "Can't find config file: #{f}." unless File.readable? f
59
+
60
+ options[:config] = f
61
+ }
62
+
63
+ opts.separator ''
64
+
65
+ opts.on('-i', '--input FILE', "Input file [Default: #{options[:input] == STDIN ? 'STDIN' : options[:input]}]") { |f|
66
+ abort "Can't find input file: #{f}." unless File.readable? f
67
+
68
+ options[:input] = File.open(f, 'r')
69
+
70
+ p = File.basename(f).split('.')
71
+ options[:spec_fallback] = p.last.downcase
72
+ options[:target_fallback] = p[0..-2].join('.')
73
+ }
74
+
75
+ opts.on('-s', '--spec SPEC', "Input format (spec) [Default: file ending of <input-file>]") { |s|
76
+ options[:spec] = s.downcase
77
+ }
78
+
79
+ opts.on('-L', '--list-specs', "List available input formats (specs) and exit") {
80
+ puts "Available input formats (specs):"
81
+ Athena.input_formats.each { |s, k|
82
+ puts " - #{s} = #{k}"
83
+ }
84
+
85
+ exit 0
86
+ }
87
+
88
+ opts.separator ''
89
+
90
+ opts.on('-o', '--output FILE', "Output file [Default: #{options[:output] == STDOUT ? 'STDOUT' : options[:output]}]") { |f|
91
+ options[:output] = File.open(f, 'w')
92
+
93
+ options[:format_fallback] = f.split('.').last.downcase
94
+ }
95
+
96
+ opts.on('-f', '--format FORMAT', "Output format [Default: file ending of <output-file>]") { |f|
97
+ options[:format] = f.downcase
98
+ }
99
+
100
+ opts.on('-l', '--list-formats', "List available output formats and exit") {
101
+ puts "Available output formats:"
102
+ Athena.output_formats.each { |f, k|
103
+ puts " - #{f} = #{k}"
104
+ }
105
+
106
+ exit 0
107
+ }
108
+
109
+ opts.separator ''
110
+
111
+ opts.on('-t', '--target ID', "Target whose config to use [Default: <input-file> minus file ending,", "plus '.<spec>', plus ':<format>' (reversely in turn)]") { |t|
112
+ options[:target] = t
113
+ }
114
+
115
+ opts.separator ''
116
+ opts.separator 'Generic options:'
117
+
118
+ opts.on('-v', '--verbose [WHAT]', "Be verbose about what's being done. Optional argument is a comma-separated", "list of what should be output, or 'all' [Default: 'all']") { |what|
119
+ if what.nil? || what == 'all'
120
+ $_VERBOSE.default = true
121
+ else
122
+ what.split(',').each { |w|
123
+ $_VERBOSE[w.to_sym] = true
124
+ }
125
+ end
126
+ }
127
+
128
+ opts.on('-h', '--help', 'Print this help message and exit') {
129
+ abort opts.to_s
130
+ }
131
+
132
+ opts.on('--version', 'Print program version and exit') {
133
+ abort "#{File.basename($0)} v#{Athena::VERSION}"
134
+ }
135
+ }.parse!
136
+
137
+ spec = options[:spec] || options[:spec_fallback]
138
+ abort "No input format (spec) specified and none could be inferred." unless spec
139
+ abort "Invalid input format (spec): #{spec}. Use '-L' to get a list of available specs." unless Athena.valid_input_format? spec
140
+
141
+ format = options[:format] || options[:format_fallback]
142
+ abort "No output format specified and none could be inferred." unless format
143
+ abort "Invalid output format: #{format}. Use '-l' to get a list of available formats." unless Athena.valid_output_format? format
144
+
145
+ yaml = YAML.load_file(options[:config])
146
+ if t = options[:target]
147
+ target = t
148
+ config = yaml[t.to_sym]
149
+ else
150
+ [options[:target_fallback] || 'generic', ".#{spec}", ":#{format}"].inject([]) { |s, t|
151
+ s << (s.last ? s.last + t : t)
152
+ }.reverse.find { |t|
153
+ target = t
154
+ config = yaml[t.to_sym]
155
+ }
156
+ end
157
+ abort "Config not found for target: #{target}." unless config
158
+
159
+ records = Athena.parser(config, spec).parse(options[:input]) { |record|
160
+ options[:output].puts record.to(format)
161
+ }
162
+
163
+ Athena::Util.verbose(:count) do
164
+ spit records.size
165
+ end
@@ -0,0 +1,72 @@
1
+ :example:
2
+ :__record_element: "record"
3
+ :author: "author"
4
+ :title:
5
+ :elements:
6
+ - "title/main"
7
+ - "title/subtitle"
8
+ :string: "%s: %s"
9
+ :empty: ">>n/a<<"
10
+ :place:
11
+ :elements:
12
+ - "city"
13
+ - "country"
14
+ :separator: " / "
15
+ :multiple-fields-per-element:
16
+ :__record_element: "record"
17
+ :author:
18
+ :elements:
19
+ - "author"
20
+ - "city"
21
+ - "title/main"
22
+ :string: "author=%s (city=%s) [title/main=%s]"
23
+ :title:
24
+ :elements:
25
+ - "title/main"
26
+ - "title/subtitle"
27
+ - "author"
28
+ :string: "title/main=%s: title/subtitle=%s (author=%s)"
29
+ :empty: ">>n/a<<"
30
+ :place:
31
+ :elements:
32
+ - "city"
33
+ - "country"
34
+ - "title"
35
+ :string: "city=%s / country=%s (title=%s)"
36
+ :sisis-ex:
37
+ :__record_element: "0000" # KatalogNr
38
+ :author:
39
+ :elements:
40
+ - "0100" # VerfAnsetz
41
+ - "0101" # Verf_Ordn
42
+ :string: "%s (%s)"
43
+ :title:
44
+ :elements:
45
+ - "0331" # HST
46
+ - "0335" # HSTZusatz
47
+ - "0370" # Untertitstab
48
+ :string: "%s : %s [%s]"
49
+ :place: "2028" # ort2sb
50
+ :sisis-multiple-fields-per-element:
51
+ :__record_element: "0000"
52
+ :author:
53
+ :elements:
54
+ - "0100"
55
+ - "0101"
56
+ - "0331"
57
+ - "2028"
58
+ :string: "VerfAnsetz=%s (Verf_Ordn=%s) [HST=%s] / ort2sb=%s"
59
+ :title:
60
+ :elements:
61
+ - "0331"
62
+ - "0335"
63
+ - "0370"
64
+ - "0100"
65
+ :string: "HST=%s : HSTZusatz=%s [Untertitstab=%s] (VerfAnsetz=%s)"
66
+ :place:
67
+ :elements:
68
+ - "2028"
69
+ - "0335"
70
+ - "0370"
71
+ - "0100"
72
+ :string: "ort2sb=%s (HSTZusatz=%s [Untertitstab=%s] / VerfAnsetz=%s)"
@@ -0,0 +1,26 @@
1
+ <root>
2
+ <record>
3
+ <author>
4
+ <first>John Doe</first>
5
+ <second>John Q.</second>
6
+ <third>JJ</third>
7
+ </author>
8
+ <title>
9
+ <main>Just kiddin'</main>
10
+ <subtitle>heh?</subtitle>
11
+ </title>
12
+ <city>Nowhere</city>
13
+ <country>None</country>
14
+ </record>
15
+ <record>
16
+ <author>
17
+ Jane Doe
18
+ <separator />
19
+ JD
20
+ </author>
21
+ <title>
22
+ <main>No title</main>
23
+ </title>
24
+ <city>Nowhere</city>
25
+ </record>
26
+ </root>
@@ -0,0 +1,90 @@
1
+ 0000:3
2
+ 0001:000000003
3
+ 0002:02.05.2001
4
+ 0003:27.08.2002
5
+ 0015:ger; lat
6
+ 0036:m
7
+ 0100.001:[Pecka, Michael]
8
+ 0110.001:[Pieczek, Michael]
9
+ 0331:Denckwürdiges Geheimnuß
10
+ 0335:Teutsche Erklärung des Kupfferstücks
11
+ 0370.001:Dum tua privato fessus das lumina somno
12
+ 0370.002:Einer/ oder gar drey müssen für das Volck sterben
13
+ 0424:1620
14
+ 0425:[ca. 1620]
15
+ 0433:1 Bl.
16
+ 0434:1 Ill.; Radierung 19,5 x 27,5 cm(nach einem Kupferstich von Gaspar Dooms)
17
+ 0435:Satzspiegel 38 x 30 cm
18
+ 0440.001:[S.l.]
19
+ 0509:Verfasser ermittelt aus Nachweis
20
+ 0511:Erscheinungsjahr ermittelt aus Inhalt und Nachweis
21
+ 0527.001:Andere Ausgabe: Pfeffer, Maria
22
+ 0720.001:Politisches Flugblatt; Prokaiserliches Flugblatt
23
+ 0721.001:Dreissigjähriger Krieg; Böhmisch-Pfälzischer Krieg; Krieg; 1600-1650
24
+ 0722.001:Böhmen <Königreich>; Schlesien; Mähren; Lausitz; Bayern; Österreich; Prag
25
+ 0723.001:Ferdinand <Römisch-Deutsches Reich, Kaiser, II.>; Christus
26
+ 1105.001:Einbl. V,8 b-10
27
+ 1105.002:Einbl. V,60
28
+ 1125.001:Blattmaß 38 x 30 cm; Hinterklebt ; Klebereste
29
+ 1125.002:Blattmaß 30 x 31,5 cm; Fragment: unterer Textteil fehlt teilweise
30
+ 1145.001:ei
31
+ 1145.002:ei
32
+ 2005:Metallschnittrahmen; Metallschnittleiste als Spaltentrenner; Zahlen als Marginalien
33
+ 2006:Deutsche illustrierte Flugblätter des 16. und 17. Jahrhunderts . Hrsg. von Wolfgang Harms. Bd. 2
34
+ 2008:Denkwürdig Geheimnis Prophezeiung Zustand Böhmen Unwesen
35
+ 2018:Lateinischer Text in der Radierung als Beischriften; Im unteren Teil des Bildes Versform
36
+ 2023:¤ 61B2(Ferdinand <Römisch-Deutsches Reich, Kaiser, II.>)(+1)
37
+ 2028:BLA
38
+ 2029:001
39
+ 2030.001:http://zoom.bib-bvb.de/StyleServer/calcrgn?cat=einbl&item=/300000183_0_r.sid&wid=750&hei=500&style=bsb/einbl.xsl&plugin=false
40
+ 2030.002:http://zoom.bib-bvb.de/StyleServer/calcrgn?cat=einbl&item=/300000368_0_r.sid&wid=750&hei=500&style=bsb/einbl.xsl&plugin=false
41
+ 9999:
42
+ 0000:4
43
+ 0001:000000004
44
+ 0002:03.05.2001
45
+ 0003:29.08.2002
46
+ 0015:ger
47
+ 0036:m
48
+ 0331:Machometische Zanck= und Haderkatzen
49
+ 0370.001:UNser Planet Ist Machomet, Saturnisch Thier Sind wir allhier
50
+ 0424:1621
51
+ 0425:1621
52
+ 0433:1 Bl.
53
+ 0434:1 Ill.; Kupferstich 14 x 24 cm
54
+ 0435:Satzspiegel 30,5 x 24,5 cm
55
+ 0440.001:[S.l.]
56
+ 0720.001:Politische Flugblatt; Antiunionistisches Flugblatt
57
+ 0721.001:Dreissigjähriger Krieg; Böhmisch-Pfälzischer Krieg
58
+ 0723.001:Mu.hammad; Mohammed
59
+ 1105.001:Einbl. V,8 b-11
60
+ 1125.001:Blattmaß 32,5 x 26 cm; Hinterklebt
61
+ 1145.001:ei
62
+ 2005:Metallschnittleisten als Spaltentrenner; Textinitiale
63
+ 2006:Paas, John Roger: The German Political Broadsheet (1600 - 1700)
64
+ 2008:Mohammed; Zank; Hader; Katze
65
+ 2018:Versform; 4 Spalten
66
+ 2023:¤ 34B12 - Katze ¤ 25F(+51) - kämpfende Tiere; aggressive Beziehungen
67
+ 2029:002
68
+ 2030.001:http://zoom.bib-bvb.de/StyleServer/calcrgn?cat=einbl&item=/300000184_0_r.sid&wid=750&hei=500&style=bsb/einbl.xsl&plugin=false
69
+ 9999:
70
+ 0000:5
71
+ 0001:000000005
72
+ 0002:03.05.2001
73
+ 0003:29.08.2002
74
+ 0015:ger
75
+ 0036:m
76
+ 0331:Alles hatt Sein Zeydt
77
+ 0331.001:-- Hää?
78
+ 0424:1621
79
+ 0425:[1621]
80
+ 0433:1 Bl.
81
+ 0434:1 Ill.; 16 x 27,5 cm
82
+ 0435:Satspiegel 16 x 27,5 cm
83
+ 0440.001:[S.l.]
84
+ 0511:Erscheinungsjahr ermittelt aus Bildinhalt und Nachweis
85
+ 0511:Erscheinungsjahr ermittelt aus Bildinhalt und Nachweis
86
+ 0527.001:Andere Ausgabe: Einbl. V,8 b-8; Deutsche illustrierte Flugblätter des 16. und 17. Jahrhunderts
87
+ 2028:BLUB
88
+ 2029:003
89
+ 2030.001:http://zoom.bib-bvb.de/StyleServer/calcrgn?cat=einbl&item=/300000185_0_r.sid&wid=750&hei=500&style=bsb/einbl.xsl&plugin=false
90
+ 9999:
@@ -0,0 +1,68 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # athena -- Convert database files to various formats #
5
+ # #
6
+ # Copyright (C) 2007 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # athena is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # athena is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with athena. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ # Athena is a library to convert (mainly) prometheus database files to various
30
+ # output formats. It's accompanied by a corresponding script that gives access
31
+ # to all its converting features.
32
+ #
33
+ # In order to support additional input and/or output formats, Athena::Formats
34
+ # needs to be sub-classed and, respectively, an instance method _parse_ or a
35
+ # class method _convert_ supplied. This way, a specific format can even function
36
+ # as both input and output format.
37
+
38
+ require 'athena/util'
39
+ require 'athena/parser'
40
+ require 'athena/record'
41
+ require 'athena/formats'
42
+ require 'athena/version'
43
+
44
+ module Athena
45
+
46
+ extend self
47
+
48
+ def parser(config, format)
49
+ Parser.new(config, format)
50
+ end
51
+
52
+ def input_formats
53
+ Formats.formats[:in].sort
54
+ end
55
+
56
+ def valid_input_format?(format)
57
+ Formats.valid_format?(:in, format)
58
+ end
59
+
60
+ def output_formats
61
+ Formats.formats[:out].sort
62
+ end
63
+
64
+ def valid_output_format?(format)
65
+ Formats.valid_format?(:out, format)
66
+ end
67
+
68
+ end