perseus 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +21 -0
- data/README.md +67 -0
- data/Rakefile +6 -0
- data/bin/console +7 -0
- data/bin/setup +8 -0
- data/data/perseus-index-by-edition.json +71079 -0
- data/data/perseus-index-by-group.json +72957 -0
- data/data/perseus-index.xml +32861 -0
- data/exe/homer +89 -0
- data/exe/perseus +140 -0
- data/lib/perseus.rb +14 -0
- data/lib/perseus/constants.rb +7 -0
- data/lib/perseus/corpora.rb +17 -0
- data/lib/perseus/corpus_hash.rb +22 -0
- data/lib/perseus/corpus_references.rb +8 -0
- data/lib/perseus/cts_element.rb +19 -0
- data/lib/perseus/dictionary.rb +8 -0
- data/lib/perseus/file_index_xml.rb +12 -0
- data/lib/perseus/helpers.rb +62 -0
- data/lib/perseus/index_xml.rb +97 -0
- data/lib/perseus/network_index_xml.rb +15 -0
- data/lib/perseus/passage.rb +8 -0
- data/lib/perseus/version.rb +3 -0
- data/perseus.gemspec +33 -0
- metadata +198 -0
data/exe/homer
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#/ Usage: <homer> (-iliad or -odyssey) [options]...
|
3
|
+
#/ Fetch content from homers epics by book and line
|
4
|
+
# ** Tip: use #/ lines to define the --help usage message.
|
5
|
+
$stderr.sync = true
|
6
|
+
require 'optparse'
|
7
|
+
require 'pry'
|
8
|
+
require 'ox'
|
9
|
+
require 'awesome_print'
|
10
|
+
require 'roman-numerals'
|
11
|
+
require 'net/http'
|
12
|
+
require 'uri'
|
13
|
+
|
14
|
+
# default options
|
15
|
+
ILIAD = 1
|
16
|
+
ODYSSEY = 2
|
17
|
+
options = {
|
18
|
+
title: nil,
|
19
|
+
translate: false,
|
20
|
+
language: "grc", #or "eng" or "lat"
|
21
|
+
books: [1],#[1, 21, 13]
|
22
|
+
lines: [1]#[1, 108, 257]
|
23
|
+
}
|
24
|
+
# parse arguments
|
25
|
+
file = __FILE__
|
26
|
+
optparse = OptionParser.new do |opts|
|
27
|
+
opts.on("-translate", "--translate") {
|
28
|
+
options[:translate] = true
|
29
|
+
}
|
30
|
+
opts.on("-iliad", "--iliad") {
|
31
|
+
options[:title] = ILIAD
|
32
|
+
}
|
33
|
+
opts.on("-odyssey", "--odyssey") {
|
34
|
+
options[:title] = ODYSSEY
|
35
|
+
}
|
36
|
+
opts.on("-lang", "--language=val", String) { |val|
|
37
|
+
options[:language] = val
|
38
|
+
}
|
39
|
+
opts.on("-b", "--book=val", Integer) { |val|
|
40
|
+
options[:books] = [val]
|
41
|
+
}
|
42
|
+
opts.on("-l", "--line=val", Integer) { |val|
|
43
|
+
options[:lines] = [val]
|
44
|
+
}
|
45
|
+
opts.on("--books=[x,y,z]", Array) { |val|
|
46
|
+
options[:books] = val
|
47
|
+
}
|
48
|
+
opts.on("--lines=[x,y,z]", Array) { |val|
|
49
|
+
options[:lines] = val
|
50
|
+
}
|
51
|
+
opts.on_tail("-h", "--help") {
|
52
|
+
exec "grep ^#/<'#{file}'|cut -c4-"
|
53
|
+
}
|
54
|
+
end
|
55
|
+
optparse.parse!
|
56
|
+
|
57
|
+
raise "Please provide the title of the work you are looking for -> odyssey or iliad".red if options[:title].nil?
|
58
|
+
|
59
|
+
def open(url)
|
60
|
+
Net::HTTP.get(URI.parse(url))
|
61
|
+
end
|
62
|
+
|
63
|
+
def fetch_stanzas title, language, books, lines
|
64
|
+
books.each do |book|
|
65
|
+
lines.each do |line|
|
66
|
+
ctsurl_pfx = "http://www.perseus.tufts.edu/hopper/CTS?request="
|
67
|
+
node_urn = "urn:cts:greekLit:tlg0012.tlg00#{title}.perseus-#{language}1:#{book}.#{line}"
|
68
|
+
book = node_urn.split(":").last.split(".")[0]
|
69
|
+
passage_url = "#{ctsurl_pfx}GetPassage&urn=#{node_urn}"
|
70
|
+
passage_xml = open(passage_url)
|
71
|
+
passage_content = Ox.parse(passage_xml)
|
72
|
+
begin
|
73
|
+
line_content = passage_content.nodes[0].nodes[1].nodes[0].nodes[0].nodes[0].nodes[0].nodes[1]
|
74
|
+
if line_content.nil?
|
75
|
+
line_content = passage_content.nodes[0].nodes[1].nodes[0].nodes[0].nodes[0].nodes[0].nodes[0]
|
76
|
+
end
|
77
|
+
puts "#{RomanNumerals.to_roman(book.to_i).cyan}\t#{line.to_s.yellow}: #{line_content.green}"
|
78
|
+
rescue Exception => e
|
79
|
+
if e.message == "undefined method `nodes' for \"3\":String"
|
80
|
+
puts "language: #{language}, book: #{RomanNumerals.to_roman(book.to_i)}, line: #{line} missing?".red
|
81
|
+
else
|
82
|
+
puts e
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
fetch_stanzas options[:title], options[:language], options[:books], options[:lines]
|
data/exe/perseus
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'optparse/time'
|
5
|
+
require 'ostruct'
|
6
|
+
require 'pp'
|
7
|
+
require 'awesome_print'
|
8
|
+
require 'perseus'
|
9
|
+
require 'perseus/network_index_xml'
|
10
|
+
require 'perseus/file_index_xml'
|
11
|
+
|
12
|
+
class OptparseExample
|
13
|
+
Version = Perseus::VERSION
|
14
|
+
class ScriptOptions
|
15
|
+
attr_accessor :query,
|
16
|
+
:search_by,
|
17
|
+
:verbose
|
18
|
+
def initialize
|
19
|
+
self.query = nil
|
20
|
+
self.search_by = "group"
|
21
|
+
self.verbose = false
|
22
|
+
end
|
23
|
+
|
24
|
+
def define_options(parser)
|
25
|
+
parser.banner = "Usage: perseus [options]"
|
26
|
+
parser.separator ""
|
27
|
+
parser.separator "Specific options:"
|
28
|
+
|
29
|
+
# add additional options
|
30
|
+
generate_index(parser)
|
31
|
+
search_a_word(parser)
|
32
|
+
get_passage_valid_references(parser)
|
33
|
+
get_passage(parser)
|
34
|
+
search_by_title_option(parser)
|
35
|
+
search_by_group_option(parser)
|
36
|
+
boolean_verbose_option(parser)
|
37
|
+
|
38
|
+
parser.separator ""
|
39
|
+
parser.separator "Common options:"
|
40
|
+
# No argument, shows at tail. This will print an options summary.
|
41
|
+
# Try it and see!
|
42
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
43
|
+
puts parser
|
44
|
+
exit
|
45
|
+
end
|
46
|
+
# Another typical switch to print the version.
|
47
|
+
parser.on_tail("--version", "Show version") do
|
48
|
+
puts Version
|
49
|
+
exit
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def generate_index(parser)
|
54
|
+
#Optional '--type' option argument with keyword completion.
|
55
|
+
parser.on("--generate_json []", [:network, :local],
|
56
|
+
"Generates the perseus indeces in json format choose fetch method (network -> download, save and index from network, local -> index from already downloaded xml file)") do |source|
|
57
|
+
Perseus::NetworkIndexXML.new.generate_json_indeces if source.eql? :network
|
58
|
+
Perseus::FileIndexXML.new.generate_json_indeces if source.eql? :local
|
59
|
+
exit
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def search_a_word(parser)
|
64
|
+
parser.on("--word WORD", "Search ancient greek or latin word") do |word|
|
65
|
+
w = Perseus::Dictionary.new(word)
|
66
|
+
puts JSON.pretty_generate(w.to_h)
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_passage_valid_references(parser)
|
72
|
+
parser.on("--valid_refs URN", "Get valid urn's to query passage content") do |urn|
|
73
|
+
refs = Perseus::CorpusReferences.new(urn)
|
74
|
+
reference_links = refs.to_h["GetValidReff"]["reply"]["reff"]["urn"]
|
75
|
+
puts "You are about to get a list of valid references for this passage"
|
76
|
+
puts "Are you sure you want the all printed on the screen?".red
|
77
|
+
puts "Answering anything but yes will only print the first 10 references".yellow
|
78
|
+
print_all = gets
|
79
|
+
if print_all.eql? "yes"
|
80
|
+
reference_links.each { |link| puts link.cyan }
|
81
|
+
else
|
82
|
+
reference_links.each_with_index { |link, i| puts link.cyan if i < 10 }
|
83
|
+
end
|
84
|
+
#.map { |e|
|
85
|
+
#e.split(":").last
|
86
|
+
#}
|
87
|
+
exit
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def get_passage(parser)
|
92
|
+
parser.on("--passage URN", "Get passage content for given urn") do |urn|
|
93
|
+
passage = Perseus::Passage.new(urn)
|
94
|
+
puts JSON.pretty_generate(passage.to_h)
|
95
|
+
exit
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def search_by_title_option(parser)
|
100
|
+
parser.on("--title TITLE", "Search by title") do |query|
|
101
|
+
self.search_by = "label"
|
102
|
+
self.query = query
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def search_by_group_option(parser)
|
107
|
+
parser.on("--author GROUPNAME/AUTHOR", "Search by groupname/author") do |query|
|
108
|
+
self.search_by = "groupname"
|
109
|
+
self.query = query
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def boolean_verbose_option(parser)
|
114
|
+
# Boolean switch.
|
115
|
+
parser.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
116
|
+
self.verbose = v
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def parse(args)
|
122
|
+
@options = ScriptOptions.new
|
123
|
+
@args = OptionParser.new do |parser|
|
124
|
+
@options.define_options(parser)
|
125
|
+
parser.parse!(args)
|
126
|
+
end
|
127
|
+
@options
|
128
|
+
end
|
129
|
+
|
130
|
+
attr_reader :parser, :options
|
131
|
+
end
|
132
|
+
|
133
|
+
options = OptparseExample.new.parse(ARGV)
|
134
|
+
unless options.query.nil?
|
135
|
+
texts = Perseus::Corpora.new.all.deep_locate -> (key, value, object) {
|
136
|
+
key == options.search_by && value.include?(options.query)
|
137
|
+
}
|
138
|
+
puts "Found #{texts.size} editions matching your query: #{options.query}"
|
139
|
+
Perseus.print_editions texts
|
140
|
+
end
|
data/lib/perseus.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# perseus api
|
2
|
+
require 'pathname'
|
3
|
+
require "perseus/version"
|
4
|
+
require "perseus/constants"
|
5
|
+
require "perseus/helpers"
|
6
|
+
require "perseus/corpus_hash"
|
7
|
+
require 'perseus/corpora'
|
8
|
+
require 'perseus/corpus_references'
|
9
|
+
require 'perseus/dictionary'
|
10
|
+
require 'perseus/passage'
|
11
|
+
|
12
|
+
module Perseus
|
13
|
+
# Extra or cool stuff here
|
14
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
module Perseus
|
2
|
+
CTS_PFX = "http://www.perseus.tufts.edu/hopper/CTS?request="
|
3
|
+
DATA_DIR = Pathname.new(__FILE__).join("../../../data")
|
4
|
+
CTS_XML_FILE = "#{DATA_DIR}/perseus-index.xml"
|
5
|
+
CTS_BY_GROUP_JSON_FILE = "#{DATA_DIR}/perseus-index-by-group.json"
|
6
|
+
ALL_EDITIONS_JSON = "#{DATA_DIR}/perseus-index-by-edition.json"
|
7
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'hashie'
|
3
|
+
require 'perseus/corpus_hash'
|
4
|
+
|
5
|
+
module Perseus
|
6
|
+
class Corpora
|
7
|
+
def initialize
|
8
|
+
@elements = JSON.parse(File.read(Perseus::ALL_EDITIONS_JSON)).map do |e|
|
9
|
+
Perseus::CorpusHash.new e
|
10
|
+
end
|
11
|
+
@elements.extend(Hashie::Extensions::DeepLocate)
|
12
|
+
end
|
13
|
+
def all
|
14
|
+
@elements
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'hashie'
|
2
|
+
module Perseus
|
3
|
+
class CorpusHash < Hash
|
4
|
+
include Hashie::Extensions::MergeInitializer
|
5
|
+
include Hashie::Extensions::IndifferentAccess
|
6
|
+
include Hashie::Extensions::MethodAccess
|
7
|
+
include Hashie::Extensions::Coercion
|
8
|
+
coerce_value Hash, CorpusHash
|
9
|
+
def initialize(hash = {})
|
10
|
+
super
|
11
|
+
hash.each_pair do |k,v|
|
12
|
+
if v.kind_of?(Array)
|
13
|
+
self[k] = v.map { |v_i| CorpusHash.new(v_i) }
|
14
|
+
elsif v.kind_of?(Hash)
|
15
|
+
self[k] = CorpusHash.new v
|
16
|
+
else
|
17
|
+
self[k] = v
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'active_support'
|
3
|
+
require 'active_support/core_ext'
|
4
|
+
require 'net/http'
|
5
|
+
require 'uri'
|
6
|
+
module Perseus
|
7
|
+
class CTSElement
|
8
|
+
attr_reader :to_s, :to_h, :to_json
|
9
|
+
def to_s
|
10
|
+
@to_s ||= Net::HTTP.get(URI.parse(@urn))
|
11
|
+
end
|
12
|
+
def to_h
|
13
|
+
@to_h ||= Hash.from_xml(to_s)
|
14
|
+
end
|
15
|
+
def to_json
|
16
|
+
@to_json ||= to_h.to_json
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Perseus
|
2
|
+
def self.print_editions editions
|
3
|
+
editions.each do |edition|
|
4
|
+
print_corpus_info edition.groupname, edition.type, edition.label, edition.urn, edition.language.green, edition.description
|
5
|
+
end
|
6
|
+
end
|
7
|
+
def self.print_corpus texts
|
8
|
+
texts.each do |t|
|
9
|
+
groupname = t.groupname
|
10
|
+
t.work.each_with_index do |work, i|
|
11
|
+
begin
|
12
|
+
# Check if we have many editions
|
13
|
+
unless work["edition"].nil?
|
14
|
+
language = work["xml:lang"]
|
15
|
+
if work.edition.kind_of?(Array)
|
16
|
+
work.edition.each do |edition|
|
17
|
+
title = edition.label
|
18
|
+
urn = edition.urn
|
19
|
+
desc = edition.description
|
20
|
+
print_corpus_info groupname, "edition", title, urn, language.green, desc
|
21
|
+
end
|
22
|
+
else
|
23
|
+
title = work.title
|
24
|
+
urn = work.edition.urn
|
25
|
+
desc = work.edition.description
|
26
|
+
print_corpus_info groupname, "edition", title, urn, language.green
|
27
|
+
end
|
28
|
+
end
|
29
|
+
# Check to see if we have translations
|
30
|
+
unless work["translation"].nil?
|
31
|
+
# Check if we have many translations
|
32
|
+
if work.translation.kind_of?(Array)
|
33
|
+
work.translation.each do |translation|
|
34
|
+
title = translation.label
|
35
|
+
urn = translation.urn
|
36
|
+
language = translation["xml:lang"]
|
37
|
+
print_corpus_info groupname, "translation", title, urn, language.redish
|
38
|
+
end
|
39
|
+
else
|
40
|
+
title = work.title
|
41
|
+
urn = work.translation.urn
|
42
|
+
language = work.translation["xml:lang"]
|
43
|
+
print_corpus_info groupname, "translation", title, urn, language.redish
|
44
|
+
end
|
45
|
+
end
|
46
|
+
rescue Exception => e
|
47
|
+
puts "exception: #{e.message.red}"
|
48
|
+
#puts "Stack trace: #{backtrace.map {|l| " #{l}\n"}.join}"
|
49
|
+
puts "We were working in group: #{groupname.cyan} with the following data point:".green
|
50
|
+
puts work.inspect.yellow
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.print_corpus_info groupname, type, title, urn, language, description = nil
|
57
|
+
puts "#{groupname} - #{title.purple}: #{urn.yellow}, #{type}: #{language}"
|
58
|
+
unless description.nil?
|
59
|
+
puts description.cyan
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'awesome_print'
|
2
|
+
require 'perseus/corpus_hash'
|
3
|
+
require 'perseus/cts_element'
|
4
|
+
module Perseus
|
5
|
+
class IndexXML < CTSElement
|
6
|
+
attr_reader :corpus_by_groupname, :corpus_by_edition
|
7
|
+
def by_groupname
|
8
|
+
@corpus_by_groupname ||= generate_structure_by_group
|
9
|
+
end
|
10
|
+
def by_edition
|
11
|
+
@corpus_by_edition ||= generate_structure_by_edition
|
12
|
+
end
|
13
|
+
|
14
|
+
def generate_structure_by_group
|
15
|
+
to_h["TextInventory"]["textgroup"].map do |text|
|
16
|
+
CorpusHash.new text
|
17
|
+
end.map do |t|
|
18
|
+
tmp_hash = CorpusHash.new
|
19
|
+
t.work.each_with_index do |work, i|
|
20
|
+
if work.kind_of?(Array)
|
21
|
+
# This is a special kind of array and we need to make
|
22
|
+
# it adhere to our protocol
|
23
|
+
tmp_hash[work[0]] = work[1]
|
24
|
+
#puts work.inspect
|
25
|
+
#puts tmp_hash
|
26
|
+
if t.work.size - 1 == i
|
27
|
+
t.work = [tmp_hash]
|
28
|
+
tmp_hash = CorpusHash.new
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end && t
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def generate_structure_by_edition
|
36
|
+
new_corpus = []
|
37
|
+
corpus_by_groupname.each do |t|
|
38
|
+
groupname = t.groupname
|
39
|
+
t.work.each_with_index do |work, i|
|
40
|
+
begin
|
41
|
+
# Check if we have many editions
|
42
|
+
unless work["edition"].nil?
|
43
|
+
if work.edition.kind_of?(Array)
|
44
|
+
work.edition.each do |edition|
|
45
|
+
new_corpus.push(CorpusHash.new({
|
46
|
+
groupname: groupname,
|
47
|
+
language: work["xml:lang"],
|
48
|
+
type: "edition",
|
49
|
+
}).merge(edition))
|
50
|
+
end
|
51
|
+
else
|
52
|
+
new_corpus.push(CorpusHash.new({
|
53
|
+
groupname: groupname,
|
54
|
+
language: work["xml:lang"],
|
55
|
+
type: "edition",
|
56
|
+
}).merge(work.edition))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
# Check to see if we have translations
|
60
|
+
unless work["translation"].nil?
|
61
|
+
# Check if we have many translations
|
62
|
+
if work.translation.kind_of?(Array)
|
63
|
+
work.translation.each do |translation|
|
64
|
+
new_corpus.push(CorpusHash.new({
|
65
|
+
groupname: groupname,
|
66
|
+
language: translation["xml:lang"],
|
67
|
+
type: "edition",
|
68
|
+
}).merge(translation))
|
69
|
+
end
|
70
|
+
else
|
71
|
+
new_corpus.push(CorpusHash.new({
|
72
|
+
groupname: groupname,
|
73
|
+
language: work.translation["xml:lang"],
|
74
|
+
type: "edition",
|
75
|
+
}).merge(work.translation))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
rescue Exception => e
|
79
|
+
puts "exception: #{e.message.red}"
|
80
|
+
#puts "Stack trace: #{backtrace.map {|l| " #{l}\n"}.join}"
|
81
|
+
puts "We were working in group: #{groupname.cyan} with the following data point:".green
|
82
|
+
puts work.inspect.yellow
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
new_corpus
|
87
|
+
end
|
88
|
+
|
89
|
+
def generate_json_indeces
|
90
|
+
puts "Generating index by groupname"
|
91
|
+
File.write(Perseus::CTS_BY_GROUP_JSON_FILE, JSON.pretty_generate(by_groupname))
|
92
|
+
puts "Generating index by edition"
|
93
|
+
File.write(Perseus::ALL_EDITIONS_JSON, JSON.pretty_generate(by_edition))
|
94
|
+
puts "DONE".green
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|