perseus 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +21 -0
- data/README.md +67 -0
- data/Rakefile +6 -0
- data/bin/console +7 -0
- data/bin/setup +8 -0
- data/data/perseus-index-by-edition.json +71079 -0
- data/data/perseus-index-by-group.json +72957 -0
- data/data/perseus-index.xml +32861 -0
- data/exe/homer +89 -0
- data/exe/perseus +140 -0
- data/lib/perseus.rb +14 -0
- data/lib/perseus/constants.rb +7 -0
- data/lib/perseus/corpora.rb +17 -0
- data/lib/perseus/corpus_hash.rb +22 -0
- data/lib/perseus/corpus_references.rb +8 -0
- data/lib/perseus/cts_element.rb +19 -0
- data/lib/perseus/dictionary.rb +8 -0
- data/lib/perseus/file_index_xml.rb +12 -0
- data/lib/perseus/helpers.rb +62 -0
- data/lib/perseus/index_xml.rb +97 -0
- data/lib/perseus/network_index_xml.rb +15 -0
- data/lib/perseus/passage.rb +8 -0
- data/lib/perseus/version.rb +3 -0
- data/perseus.gemspec +33 -0
- metadata +198 -0
data/exe/homer
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#/ Usage: <homer> (-iliad or -odyssey) [options]...
|
3
|
+
#/ Fetch content from homers epics by book and line
|
4
|
+
# ** Tip: use #/ lines to define the --help usage message.
|
5
|
+
$stderr.sync = true
|
6
|
+
require 'optparse'
|
7
|
+
require 'pry'
|
8
|
+
require 'ox'
|
9
|
+
require 'awesome_print'
|
10
|
+
require 'roman-numerals'
|
11
|
+
require 'net/http'
|
12
|
+
require 'uri'
|
13
|
+
|
14
|
+
# default options
|
15
|
+
ILIAD = 1
|
16
|
+
ODYSSEY = 2
|
17
|
+
options = {
|
18
|
+
title: nil,
|
19
|
+
translate: false,
|
20
|
+
language: "grc", #or "eng" or "lat"
|
21
|
+
books: [1],#[1, 21, 13]
|
22
|
+
lines: [1]#[1, 108, 257]
|
23
|
+
}
|
24
|
+
# parse arguments
|
25
|
+
file = __FILE__
|
26
|
+
optparse = OptionParser.new do |opts|
|
27
|
+
opts.on("-translate", "--translate") {
|
28
|
+
options[:translate] = true
|
29
|
+
}
|
30
|
+
opts.on("-iliad", "--iliad") {
|
31
|
+
options[:title] = ILIAD
|
32
|
+
}
|
33
|
+
opts.on("-odyssey", "--odyssey") {
|
34
|
+
options[:title] = ODYSSEY
|
35
|
+
}
|
36
|
+
opts.on("-lang", "--language=val", String) { |val|
|
37
|
+
options[:language] = val
|
38
|
+
}
|
39
|
+
opts.on("-b", "--book=val", Integer) { |val|
|
40
|
+
options[:books] = [val]
|
41
|
+
}
|
42
|
+
opts.on("-l", "--line=val", Integer) { |val|
|
43
|
+
options[:lines] = [val]
|
44
|
+
}
|
45
|
+
opts.on("--books=[x,y,z]", Array) { |val|
|
46
|
+
options[:books] = val
|
47
|
+
}
|
48
|
+
opts.on("--lines=[x,y,z]", Array) { |val|
|
49
|
+
options[:lines] = val
|
50
|
+
}
|
51
|
+
opts.on_tail("-h", "--help") {
|
52
|
+
exec "grep ^#/<'#{file}'|cut -c4-"
|
53
|
+
}
|
54
|
+
end
|
55
|
+
optparse.parse!
|
56
|
+
|
57
|
+
raise "Please provide the title of the work you are looking for -> odyssey or iliad".red if options[:title].nil?
|
58
|
+
|
59
|
+
def open(url)
|
60
|
+
Net::HTTP.get(URI.parse(url))
|
61
|
+
end
|
62
|
+
|
63
|
+
def fetch_stanzas title, language, books, lines
|
64
|
+
books.each do |book|
|
65
|
+
lines.each do |line|
|
66
|
+
ctsurl_pfx = "http://www.perseus.tufts.edu/hopper/CTS?request="
|
67
|
+
node_urn = "urn:cts:greekLit:tlg0012.tlg00#{title}.perseus-#{language}1:#{book}.#{line}"
|
68
|
+
book = node_urn.split(":").last.split(".")[0]
|
69
|
+
passage_url = "#{ctsurl_pfx}GetPassage&urn=#{node_urn}"
|
70
|
+
passage_xml = open(passage_url)
|
71
|
+
passage_content = Ox.parse(passage_xml)
|
72
|
+
begin
|
73
|
+
line_content = passage_content.nodes[0].nodes[1].nodes[0].nodes[0].nodes[0].nodes[0].nodes[1]
|
74
|
+
if line_content.nil?
|
75
|
+
line_content = passage_content.nodes[0].nodes[1].nodes[0].nodes[0].nodes[0].nodes[0].nodes[0]
|
76
|
+
end
|
77
|
+
puts "#{RomanNumerals.to_roman(book.to_i).cyan}\t#{line.to_s.yellow}: #{line_content.green}"
|
78
|
+
rescue Exception => e
|
79
|
+
if e.message == "undefined method `nodes' for \"3\":String"
|
80
|
+
puts "language: #{language}, book: #{RomanNumerals.to_roman(book.to_i)}, line: #{line} missing?".red
|
81
|
+
else
|
82
|
+
puts e
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
fetch_stanzas options[:title], options[:language], options[:books], options[:lines]
|
data/exe/perseus
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'optparse/time'
|
5
|
+
require 'ostruct'
|
6
|
+
require 'pp'
|
7
|
+
require 'awesome_print'
|
8
|
+
require 'perseus'
|
9
|
+
require 'perseus/network_index_xml'
|
10
|
+
require 'perseus/file_index_xml'
|
11
|
+
|
12
|
+
class OptparseExample
|
13
|
+
Version = Perseus::VERSION
|
14
|
+
class ScriptOptions
|
15
|
+
attr_accessor :query,
|
16
|
+
:search_by,
|
17
|
+
:verbose
|
18
|
+
def initialize
|
19
|
+
self.query = nil
|
20
|
+
self.search_by = "group"
|
21
|
+
self.verbose = false
|
22
|
+
end
|
23
|
+
|
24
|
+
def define_options(parser)
|
25
|
+
parser.banner = "Usage: perseus [options]"
|
26
|
+
parser.separator ""
|
27
|
+
parser.separator "Specific options:"
|
28
|
+
|
29
|
+
# add additional options
|
30
|
+
generate_index(parser)
|
31
|
+
search_a_word(parser)
|
32
|
+
get_passage_valid_references(parser)
|
33
|
+
get_passage(parser)
|
34
|
+
search_by_title_option(parser)
|
35
|
+
search_by_group_option(parser)
|
36
|
+
boolean_verbose_option(parser)
|
37
|
+
|
38
|
+
parser.separator ""
|
39
|
+
parser.separator "Common options:"
|
40
|
+
# No argument, shows at tail. This will print an options summary.
|
41
|
+
# Try it and see!
|
42
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
43
|
+
puts parser
|
44
|
+
exit
|
45
|
+
end
|
46
|
+
# Another typical switch to print the version.
|
47
|
+
parser.on_tail("--version", "Show version") do
|
48
|
+
puts Version
|
49
|
+
exit
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def generate_index(parser)
|
54
|
+
#Optional '--type' option argument with keyword completion.
|
55
|
+
parser.on("--generate_json []", [:network, :local],
|
56
|
+
"Generates the perseus indeces in json format choose fetch method (network -> download, save and index from network, local -> index from already downloaded xml file)") do |source|
|
57
|
+
Perseus::NetworkIndexXML.new.generate_json_indeces if source.eql? :network
|
58
|
+
Perseus::FileIndexXML.new.generate_json_indeces if source.eql? :local
|
59
|
+
exit
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def search_a_word(parser)
|
64
|
+
parser.on("--word WORD", "Search ancient greek or latin word") do |word|
|
65
|
+
w = Perseus::Dictionary.new(word)
|
66
|
+
puts JSON.pretty_generate(w.to_h)
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def get_passage_valid_references(parser)
|
72
|
+
parser.on("--valid_refs URN", "Get valid urn's to query passage content") do |urn|
|
73
|
+
refs = Perseus::CorpusReferences.new(urn)
|
74
|
+
reference_links = refs.to_h["GetValidReff"]["reply"]["reff"]["urn"]
|
75
|
+
puts "You are about to get a list of valid references for this passage"
|
76
|
+
puts "Are you sure you want the all printed on the screen?".red
|
77
|
+
puts "Answering anything but yes will only print the first 10 references".yellow
|
78
|
+
print_all = gets
|
79
|
+
if print_all.eql? "yes"
|
80
|
+
reference_links.each { |link| puts link.cyan }
|
81
|
+
else
|
82
|
+
reference_links.each_with_index { |link, i| puts link.cyan if i < 10 }
|
83
|
+
end
|
84
|
+
#.map { |e|
|
85
|
+
#e.split(":").last
|
86
|
+
#}
|
87
|
+
exit
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def get_passage(parser)
|
92
|
+
parser.on("--passage URN", "Get passage content for given urn") do |urn|
|
93
|
+
passage = Perseus::Passage.new(urn)
|
94
|
+
puts JSON.pretty_generate(passage.to_h)
|
95
|
+
exit
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def search_by_title_option(parser)
|
100
|
+
parser.on("--title TITLE", "Search by title") do |query|
|
101
|
+
self.search_by = "label"
|
102
|
+
self.query = query
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def search_by_group_option(parser)
|
107
|
+
parser.on("--author GROUPNAME/AUTHOR", "Search by groupname/author") do |query|
|
108
|
+
self.search_by = "groupname"
|
109
|
+
self.query = query
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def boolean_verbose_option(parser)
|
114
|
+
# Boolean switch.
|
115
|
+
parser.on("-v", "--[no-]verbose", "Run verbosely") do |v|
|
116
|
+
self.verbose = v
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def parse(args)
|
122
|
+
@options = ScriptOptions.new
|
123
|
+
@args = OptionParser.new do |parser|
|
124
|
+
@options.define_options(parser)
|
125
|
+
parser.parse!(args)
|
126
|
+
end
|
127
|
+
@options
|
128
|
+
end
|
129
|
+
|
130
|
+
attr_reader :parser, :options
|
131
|
+
end
|
132
|
+
|
133
|
+
options = OptparseExample.new.parse(ARGV)
|
134
|
+
unless options.query.nil?
|
135
|
+
texts = Perseus::Corpora.new.all.deep_locate -> (key, value, object) {
|
136
|
+
key == options.search_by && value.include?(options.query)
|
137
|
+
}
|
138
|
+
puts "Found #{texts.size} editions matching your query: #{options.query}"
|
139
|
+
Perseus.print_editions texts
|
140
|
+
end
|
data/lib/perseus.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# perseus api
|
2
|
+
require 'pathname'
|
3
|
+
require "perseus/version"
|
4
|
+
require "perseus/constants"
|
5
|
+
require "perseus/helpers"
|
6
|
+
require "perseus/corpus_hash"
|
7
|
+
require 'perseus/corpora'
|
8
|
+
require 'perseus/corpus_references'
|
9
|
+
require 'perseus/dictionary'
|
10
|
+
require 'perseus/passage'
|
11
|
+
|
12
|
+
module Perseus
|
13
|
+
# Extra or cool stuff here
|
14
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
module Perseus
|
2
|
+
CTS_PFX = "http://www.perseus.tufts.edu/hopper/CTS?request="
|
3
|
+
DATA_DIR = Pathname.new(__FILE__).join("../../../data")
|
4
|
+
CTS_XML_FILE = "#{DATA_DIR}/perseus-index.xml"
|
5
|
+
CTS_BY_GROUP_JSON_FILE = "#{DATA_DIR}/perseus-index-by-group.json"
|
6
|
+
ALL_EDITIONS_JSON = "#{DATA_DIR}/perseus-index-by-edition.json"
|
7
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'hashie'
|
3
|
+
require 'perseus/corpus_hash'
|
4
|
+
|
5
|
+
module Perseus
|
6
|
+
class Corpora
|
7
|
+
def initialize
|
8
|
+
@elements = JSON.parse(File.read(Perseus::ALL_EDITIONS_JSON)).map do |e|
|
9
|
+
Perseus::CorpusHash.new e
|
10
|
+
end
|
11
|
+
@elements.extend(Hashie::Extensions::DeepLocate)
|
12
|
+
end
|
13
|
+
def all
|
14
|
+
@elements
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'hashie'
|
2
|
+
module Perseus
|
3
|
+
class CorpusHash < Hash
|
4
|
+
include Hashie::Extensions::MergeInitializer
|
5
|
+
include Hashie::Extensions::IndifferentAccess
|
6
|
+
include Hashie::Extensions::MethodAccess
|
7
|
+
include Hashie::Extensions::Coercion
|
8
|
+
coerce_value Hash, CorpusHash
|
9
|
+
def initialize(hash = {})
|
10
|
+
super
|
11
|
+
hash.each_pair do |k,v|
|
12
|
+
if v.kind_of?(Array)
|
13
|
+
self[k] = v.map { |v_i| CorpusHash.new(v_i) }
|
14
|
+
elsif v.kind_of?(Hash)
|
15
|
+
self[k] = CorpusHash.new v
|
16
|
+
else
|
17
|
+
self[k] = v
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'active_support'
|
3
|
+
require 'active_support/core_ext'
|
4
|
+
require 'net/http'
|
5
|
+
require 'uri'
|
6
|
+
module Perseus
|
7
|
+
class CTSElement
|
8
|
+
attr_reader :to_s, :to_h, :to_json
|
9
|
+
def to_s
|
10
|
+
@to_s ||= Net::HTTP.get(URI.parse(@urn))
|
11
|
+
end
|
12
|
+
def to_h
|
13
|
+
@to_h ||= Hash.from_xml(to_s)
|
14
|
+
end
|
15
|
+
def to_json
|
16
|
+
@to_json ||= to_h.to_json
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Perseus
|
2
|
+
def self.print_editions editions
|
3
|
+
editions.each do |edition|
|
4
|
+
print_corpus_info edition.groupname, edition.type, edition.label, edition.urn, edition.language.green, edition.description
|
5
|
+
end
|
6
|
+
end
|
7
|
+
def self.print_corpus texts
|
8
|
+
texts.each do |t|
|
9
|
+
groupname = t.groupname
|
10
|
+
t.work.each_with_index do |work, i|
|
11
|
+
begin
|
12
|
+
# Check if we have many editions
|
13
|
+
unless work["edition"].nil?
|
14
|
+
language = work["xml:lang"]
|
15
|
+
if work.edition.kind_of?(Array)
|
16
|
+
work.edition.each do |edition|
|
17
|
+
title = edition.label
|
18
|
+
urn = edition.urn
|
19
|
+
desc = edition.description
|
20
|
+
print_corpus_info groupname, "edition", title, urn, language.green, desc
|
21
|
+
end
|
22
|
+
else
|
23
|
+
title = work.title
|
24
|
+
urn = work.edition.urn
|
25
|
+
desc = work.edition.description
|
26
|
+
print_corpus_info groupname, "edition", title, urn, language.green
|
27
|
+
end
|
28
|
+
end
|
29
|
+
# Check to see if we have translations
|
30
|
+
unless work["translation"].nil?
|
31
|
+
# Check if we have many translations
|
32
|
+
if work.translation.kind_of?(Array)
|
33
|
+
work.translation.each do |translation|
|
34
|
+
title = translation.label
|
35
|
+
urn = translation.urn
|
36
|
+
language = translation["xml:lang"]
|
37
|
+
print_corpus_info groupname, "translation", title, urn, language.redish
|
38
|
+
end
|
39
|
+
else
|
40
|
+
title = work.title
|
41
|
+
urn = work.translation.urn
|
42
|
+
language = work.translation["xml:lang"]
|
43
|
+
print_corpus_info groupname, "translation", title, urn, language.redish
|
44
|
+
end
|
45
|
+
end
|
46
|
+
rescue Exception => e
|
47
|
+
puts "exception: #{e.message.red}"
|
48
|
+
#puts "Stack trace: #{backtrace.map {|l| " #{l}\n"}.join}"
|
49
|
+
puts "We were working in group: #{groupname.cyan} with the following data point:".green
|
50
|
+
puts work.inspect.yellow
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.print_corpus_info groupname, type, title, urn, language, description = nil
|
57
|
+
puts "#{groupname} - #{title.purple}: #{urn.yellow}, #{type}: #{language}"
|
58
|
+
unless description.nil?
|
59
|
+
puts description.cyan
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'awesome_print'
|
2
|
+
require 'perseus/corpus_hash'
|
3
|
+
require 'perseus/cts_element'
|
4
|
+
module Perseus
|
5
|
+
class IndexXML < CTSElement
|
6
|
+
attr_reader :corpus_by_groupname, :corpus_by_edition
|
7
|
+
def by_groupname
|
8
|
+
@corpus_by_groupname ||= generate_structure_by_group
|
9
|
+
end
|
10
|
+
def by_edition
|
11
|
+
@corpus_by_edition ||= generate_structure_by_edition
|
12
|
+
end
|
13
|
+
|
14
|
+
def generate_structure_by_group
|
15
|
+
to_h["TextInventory"]["textgroup"].map do |text|
|
16
|
+
CorpusHash.new text
|
17
|
+
end.map do |t|
|
18
|
+
tmp_hash = CorpusHash.new
|
19
|
+
t.work.each_with_index do |work, i|
|
20
|
+
if work.kind_of?(Array)
|
21
|
+
# This is a special kind of array and we need to make
|
22
|
+
# it adhere to our protocol
|
23
|
+
tmp_hash[work[0]] = work[1]
|
24
|
+
#puts work.inspect
|
25
|
+
#puts tmp_hash
|
26
|
+
if t.work.size - 1 == i
|
27
|
+
t.work = [tmp_hash]
|
28
|
+
tmp_hash = CorpusHash.new
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end && t
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def generate_structure_by_edition
|
36
|
+
new_corpus = []
|
37
|
+
corpus_by_groupname.each do |t|
|
38
|
+
groupname = t.groupname
|
39
|
+
t.work.each_with_index do |work, i|
|
40
|
+
begin
|
41
|
+
# Check if we have many editions
|
42
|
+
unless work["edition"].nil?
|
43
|
+
if work.edition.kind_of?(Array)
|
44
|
+
work.edition.each do |edition|
|
45
|
+
new_corpus.push(CorpusHash.new({
|
46
|
+
groupname: groupname,
|
47
|
+
language: work["xml:lang"],
|
48
|
+
type: "edition",
|
49
|
+
}).merge(edition))
|
50
|
+
end
|
51
|
+
else
|
52
|
+
new_corpus.push(CorpusHash.new({
|
53
|
+
groupname: groupname,
|
54
|
+
language: work["xml:lang"],
|
55
|
+
type: "edition",
|
56
|
+
}).merge(work.edition))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
# Check to see if we have translations
|
60
|
+
unless work["translation"].nil?
|
61
|
+
# Check if we have many translations
|
62
|
+
if work.translation.kind_of?(Array)
|
63
|
+
work.translation.each do |translation|
|
64
|
+
new_corpus.push(CorpusHash.new({
|
65
|
+
groupname: groupname,
|
66
|
+
language: translation["xml:lang"],
|
67
|
+
type: "edition",
|
68
|
+
}).merge(translation))
|
69
|
+
end
|
70
|
+
else
|
71
|
+
new_corpus.push(CorpusHash.new({
|
72
|
+
groupname: groupname,
|
73
|
+
language: work.translation["xml:lang"],
|
74
|
+
type: "edition",
|
75
|
+
}).merge(work.translation))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
rescue Exception => e
|
79
|
+
puts "exception: #{e.message.red}"
|
80
|
+
#puts "Stack trace: #{backtrace.map {|l| " #{l}\n"}.join}"
|
81
|
+
puts "We were working in group: #{groupname.cyan} with the following data point:".green
|
82
|
+
puts work.inspect.yellow
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
new_corpus
|
87
|
+
end
|
88
|
+
|
89
|
+
def generate_json_indeces
|
90
|
+
puts "Generating index by groupname"
|
91
|
+
File.write(Perseus::CTS_BY_GROUP_JSON_FILE, JSON.pretty_generate(by_groupname))
|
92
|
+
puts "Generating index by edition"
|
93
|
+
File.write(Perseus::ALL_EDITIONS_JSON, JSON.pretty_generate(by_edition))
|
94
|
+
puts "DONE".green
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|