perseus 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+ #/ Usage: <homer> (-iliad or -odyssey) [options]...
3
+ #/ Fetch content from homers epics by book and line
4
+ # ** Tip: use #/ lines to define the --help usage message.
5
+ $stderr.sync = true
6
+ require 'optparse'
7
+ require 'pry'
8
+ require 'ox'
9
+ require 'awesome_print'
10
+ require 'roman-numerals'
11
+ require 'net/http'
12
+ require 'uri'
13
+
14
+ # default options
15
+ ILIAD = 1
16
+ ODYSSEY = 2
17
+ options = {
18
+ title: nil,
19
+ translate: false,
20
+ language: "grc", #or "eng" or "lat"
21
+ books: [1],#[1, 21, 13]
22
+ lines: [1]#[1, 108, 257]
23
+ }
24
+ # parse arguments
25
+ file = __FILE__
26
+ optparse = OptionParser.new do |opts|
27
+ opts.on("-translate", "--translate") {
28
+ options[:translate] = true
29
+ }
30
+ opts.on("-iliad", "--iliad") {
31
+ options[:title] = ILIAD
32
+ }
33
+ opts.on("-odyssey", "--odyssey") {
34
+ options[:title] = ODYSSEY
35
+ }
36
+ opts.on("-lang", "--language=val", String) { |val|
37
+ options[:language] = val
38
+ }
39
+ opts.on("-b", "--book=val", Integer) { |val|
40
+ options[:books] = [val]
41
+ }
42
+ opts.on("-l", "--line=val", Integer) { |val|
43
+ options[:lines] = [val]
44
+ }
45
+ opts.on("--books=[x,y,z]", Array) { |val|
46
+ options[:books] = val
47
+ }
48
+ opts.on("--lines=[x,y,z]", Array) { |val|
49
+ options[:lines] = val
50
+ }
51
+ opts.on_tail("-h", "--help") {
52
+ exec "grep ^#/<'#{file}'|cut -c4-"
53
+ }
54
+ end
55
+ optparse.parse!
56
+
57
+ raise "Please provide the title of the work you are looking for -> odyssey or iliad".red if options[:title].nil?
58
+
59
+ def open(url)
60
+ Net::HTTP.get(URI.parse(url))
61
+ end
62
+
63
+ def fetch_stanzas title, language, books, lines
64
+ books.each do |book|
65
+ lines.each do |line|
66
+ ctsurl_pfx = "http://www.perseus.tufts.edu/hopper/CTS?request="
67
+ node_urn = "urn:cts:greekLit:tlg0012.tlg00#{title}.perseus-#{language}1:#{book}.#{line}"
68
+ book = node_urn.split(":").last.split(".")[0]
69
+ passage_url = "#{ctsurl_pfx}GetPassage&urn=#{node_urn}"
70
+ passage_xml = open(passage_url)
71
+ passage_content = Ox.parse(passage_xml)
72
+ begin
73
+ line_content = passage_content.nodes[0].nodes[1].nodes[0].nodes[0].nodes[0].nodes[0].nodes[1]
74
+ if line_content.nil?
75
+ line_content = passage_content.nodes[0].nodes[1].nodes[0].nodes[0].nodes[0].nodes[0].nodes[0]
76
+ end
77
+ puts "#{RomanNumerals.to_roman(book.to_i).cyan}\t#{line.to_s.yellow}: #{line_content.green}"
78
+ rescue Exception => e
79
+ if e.message == "undefined method `nodes' for \"3\":String"
80
+ puts "language: #{language}, book: #{RomanNumerals.to_roman(book.to_i)}, line: #{line} missing?".red
81
+ else
82
+ puts e
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ fetch_stanzas options[:title], options[:language], options[:books], options[:lines]
@@ -0,0 +1,140 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'optparse/time'
5
+ require 'ostruct'
6
+ require 'pp'
7
+ require 'awesome_print'
8
+ require 'perseus'
9
+ require 'perseus/network_index_xml'
10
+ require 'perseus/file_index_xml'
11
+
12
+ class OptparseExample
13
+ Version = Perseus::VERSION
14
+ class ScriptOptions
15
+ attr_accessor :query,
16
+ :search_by,
17
+ :verbose
18
+ def initialize
19
+ self.query = nil
20
+ self.search_by = "group"
21
+ self.verbose = false
22
+ end
23
+
24
+ def define_options(parser)
25
+ parser.banner = "Usage: perseus [options]"
26
+ parser.separator ""
27
+ parser.separator "Specific options:"
28
+
29
+ # add additional options
30
+ generate_index(parser)
31
+ search_a_word(parser)
32
+ get_passage_valid_references(parser)
33
+ get_passage(parser)
34
+ search_by_title_option(parser)
35
+ search_by_group_option(parser)
36
+ boolean_verbose_option(parser)
37
+
38
+ parser.separator ""
39
+ parser.separator "Common options:"
40
+ # No argument, shows at tail. This will print an options summary.
41
+ # Try it and see!
42
+ parser.on_tail("-h", "--help", "Show this message") do
43
+ puts parser
44
+ exit
45
+ end
46
+ # Another typical switch to print the version.
47
+ parser.on_tail("--version", "Show version") do
48
+ puts Version
49
+ exit
50
+ end
51
+ end
52
+
53
+ def generate_index(parser)
54
+ #Optional '--type' option argument with keyword completion.
55
+ parser.on("--generate_json []", [:network, :local],
56
+ "Generates the perseus indeces in json format choose fetch method (network -> download, save and index from network, local -> index from already downloaded xml file)") do |source|
57
+ Perseus::NetworkIndexXML.new.generate_json_indeces if source.eql? :network
58
+ Perseus::FileIndexXML.new.generate_json_indeces if source.eql? :local
59
+ exit
60
+ end
61
+ end
62
+
63
+ def search_a_word(parser)
64
+ parser.on("--word WORD", "Search ancient greek or latin word") do |word|
65
+ w = Perseus::Dictionary.new(word)
66
+ puts JSON.pretty_generate(w.to_h)
67
+ exit
68
+ end
69
+ end
70
+
71
+ def get_passage_valid_references(parser)
72
+ parser.on("--valid_refs URN", "Get valid urn's to query passage content") do |urn|
73
+ refs = Perseus::CorpusReferences.new(urn)
74
+ reference_links = refs.to_h["GetValidReff"]["reply"]["reff"]["urn"]
75
+ puts "You are about to get a list of valid references for this passage"
76
+ puts "Are you sure you want the all printed on the screen?".red
77
+ puts "Answering anything but yes will only print the first 10 references".yellow
78
+ print_all = gets
79
+ if print_all.eql? "yes"
80
+ reference_links.each { |link| puts link.cyan }
81
+ else
82
+ reference_links.each_with_index { |link, i| puts link.cyan if i < 10 }
83
+ end
84
+ #.map { |e|
85
+ #e.split(":").last
86
+ #}
87
+ exit
88
+ end
89
+ end
90
+
91
+ def get_passage(parser)
92
+ parser.on("--passage URN", "Get passage content for given urn") do |urn|
93
+ passage = Perseus::Passage.new(urn)
94
+ puts JSON.pretty_generate(passage.to_h)
95
+ exit
96
+ end
97
+ end
98
+
99
+ def search_by_title_option(parser)
100
+ parser.on("--title TITLE", "Search by title") do |query|
101
+ self.search_by = "label"
102
+ self.query = query
103
+ end
104
+ end
105
+
106
+ def search_by_group_option(parser)
107
+ parser.on("--author GROUPNAME/AUTHOR", "Search by groupname/author") do |query|
108
+ self.search_by = "groupname"
109
+ self.query = query
110
+ end
111
+ end
112
+
113
+ def boolean_verbose_option(parser)
114
+ # Boolean switch.
115
+ parser.on("-v", "--[no-]verbose", "Run verbosely") do |v|
116
+ self.verbose = v
117
+ end
118
+ end
119
+ end
120
+
121
+ def parse(args)
122
+ @options = ScriptOptions.new
123
+ @args = OptionParser.new do |parser|
124
+ @options.define_options(parser)
125
+ parser.parse!(args)
126
+ end
127
+ @options
128
+ end
129
+
130
+ attr_reader :parser, :options
131
+ end
132
+
133
+ options = OptparseExample.new.parse(ARGV)
134
+ unless options.query.nil?
135
+ texts = Perseus::Corpora.new.all.deep_locate -> (key, value, object) {
136
+ key == options.search_by && value.include?(options.query)
137
+ }
138
+ puts "Found #{texts.size} editions matching your query: #{options.query}"
139
+ Perseus.print_editions texts
140
+ end
@@ -0,0 +1,14 @@
1
+ # perseus api
2
+ require 'pathname'
3
+ require "perseus/version"
4
+ require "perseus/constants"
5
+ require "perseus/helpers"
6
+ require "perseus/corpus_hash"
7
+ require 'perseus/corpora'
8
+ require 'perseus/corpus_references'
9
+ require 'perseus/dictionary'
10
+ require 'perseus/passage'
11
+
12
+ module Perseus
13
+ # Extra or cool stuff here
14
+ end
@@ -0,0 +1,7 @@
1
+ module Perseus
2
+ CTS_PFX = "http://www.perseus.tufts.edu/hopper/CTS?request="
3
+ DATA_DIR = Pathname.new(__FILE__).join("../../../data")
4
+ CTS_XML_FILE = "#{DATA_DIR}/perseus-index.xml"
5
+ CTS_BY_GROUP_JSON_FILE = "#{DATA_DIR}/perseus-index-by-group.json"
6
+ ALL_EDITIONS_JSON = "#{DATA_DIR}/perseus-index-by-edition.json"
7
+ end
@@ -0,0 +1,17 @@
1
+ require 'json'
2
+ require 'hashie'
3
+ require 'perseus/corpus_hash'
4
+
5
+ module Perseus
6
+ class Corpora
7
+ def initialize
8
+ @elements = JSON.parse(File.read(Perseus::ALL_EDITIONS_JSON)).map do |e|
9
+ Perseus::CorpusHash.new e
10
+ end
11
+ @elements.extend(Hashie::Extensions::DeepLocate)
12
+ end
13
+ def all
14
+ @elements
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ require 'hashie'
2
+ module Perseus
3
+ class CorpusHash < Hash
4
+ include Hashie::Extensions::MergeInitializer
5
+ include Hashie::Extensions::IndifferentAccess
6
+ include Hashie::Extensions::MethodAccess
7
+ include Hashie::Extensions::Coercion
8
+ coerce_value Hash, CorpusHash
9
+ def initialize(hash = {})
10
+ super
11
+ hash.each_pair do |k,v|
12
+ if v.kind_of?(Array)
13
+ self[k] = v.map { |v_i| CorpusHash.new(v_i) }
14
+ elsif v.kind_of?(Hash)
15
+ self[k] = CorpusHash.new v
16
+ else
17
+ self[k] = v
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,8 @@
1
+ require 'perseus/cts_element'
2
+ module Perseus
3
+ class CorpusReferences < CTSElement
4
+ def initialize urn
5
+ @urn = "#{CTS_PFX}GetValidReff&urn=#{urn}"
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,19 @@
1
+ require 'json'
2
+ require 'active_support'
3
+ require 'active_support/core_ext'
4
+ require 'net/http'
5
+ require 'uri'
6
+ module Perseus
7
+ class CTSElement
8
+ attr_reader :to_s, :to_h, :to_json
9
+ def to_s
10
+ @to_s ||= Net::HTTP.get(URI.parse(@urn))
11
+ end
12
+ def to_h
13
+ @to_h ||= Hash.from_xml(to_s)
14
+ end
15
+ def to_json
16
+ @to_json ||= to_h.to_json
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,8 @@
1
+ require 'perseus/cts_element'
2
+ module Perseus
3
+ class Dictionary < CTSElement
4
+ def initialize word
5
+ @urn = "http://www.perseus.tufts.edu/hopper/xmlmorph?lang=greek&lookup=#{word}"
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,12 @@
1
+ require 'perseus/index_xml'
2
+ module Perseus
3
+ class FileIndexXML < IndexXML
4
+ def initialize
5
+ puts "Reading from locally saved xml"
6
+ @urn = CTS_XML_FILE
7
+ end
8
+ def to_s
9
+ @to_s ||= File.read(@urn)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,62 @@
1
+ module Perseus
2
+ def self.print_editions editions
3
+ editions.each do |edition|
4
+ print_corpus_info edition.groupname, edition.type, edition.label, edition.urn, edition.language.green, edition.description
5
+ end
6
+ end
7
+ def self.print_corpus texts
8
+ texts.each do |t|
9
+ groupname = t.groupname
10
+ t.work.each_with_index do |work, i|
11
+ begin
12
+ # Check if we have many editions
13
+ unless work["edition"].nil?
14
+ language = work["xml:lang"]
15
+ if work.edition.kind_of?(Array)
16
+ work.edition.each do |edition|
17
+ title = edition.label
18
+ urn = edition.urn
19
+ desc = edition.description
20
+ print_corpus_info groupname, "edition", title, urn, language.green, desc
21
+ end
22
+ else
23
+ title = work.title
24
+ urn = work.edition.urn
25
+ desc = work.edition.description
26
+ print_corpus_info groupname, "edition", title, urn, language.green
27
+ end
28
+ end
29
+ # Check to see if we have translations
30
+ unless work["translation"].nil?
31
+ # Check if we have many translations
32
+ if work.translation.kind_of?(Array)
33
+ work.translation.each do |translation|
34
+ title = translation.label
35
+ urn = translation.urn
36
+ language = translation["xml:lang"]
37
+ print_corpus_info groupname, "translation", title, urn, language.redish
38
+ end
39
+ else
40
+ title = work.title
41
+ urn = work.translation.urn
42
+ language = work.translation["xml:lang"]
43
+ print_corpus_info groupname, "translation", title, urn, language.redish
44
+ end
45
+ end
46
+ rescue Exception => e
47
+ puts "exception: #{e.message.red}"
48
+ #puts "Stack trace: #{backtrace.map {|l| " #{l}\n"}.join}"
49
+ puts "We were working in group: #{groupname.cyan} with the following data point:".green
50
+ puts work.inspect.yellow
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+ def self.print_corpus_info groupname, type, title, urn, language, description = nil
57
+ puts "#{groupname} - #{title.purple}: #{urn.yellow}, #{type}: #{language}"
58
+ unless description.nil?
59
+ puts description.cyan
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,97 @@
1
+ require 'awesome_print'
2
+ require 'perseus/corpus_hash'
3
+ require 'perseus/cts_element'
4
+ module Perseus
5
+ class IndexXML < CTSElement
6
+ attr_reader :corpus_by_groupname, :corpus_by_edition
7
+ def by_groupname
8
+ @corpus_by_groupname ||= generate_structure_by_group
9
+ end
10
+ def by_edition
11
+ @corpus_by_edition ||= generate_structure_by_edition
12
+ end
13
+
14
+ def generate_structure_by_group
15
+ to_h["TextInventory"]["textgroup"].map do |text|
16
+ CorpusHash.new text
17
+ end.map do |t|
18
+ tmp_hash = CorpusHash.new
19
+ t.work.each_with_index do |work, i|
20
+ if work.kind_of?(Array)
21
+ # This is a special kind of array and we need to make
22
+ # it adhere to our protocol
23
+ tmp_hash[work[0]] = work[1]
24
+ #puts work.inspect
25
+ #puts tmp_hash
26
+ if t.work.size - 1 == i
27
+ t.work = [tmp_hash]
28
+ tmp_hash = CorpusHash.new
29
+ end
30
+ end
31
+ end && t
32
+ end
33
+ end
34
+
35
+ def generate_structure_by_edition
36
+ new_corpus = []
37
+ corpus_by_groupname.each do |t|
38
+ groupname = t.groupname
39
+ t.work.each_with_index do |work, i|
40
+ begin
41
+ # Check if we have many editions
42
+ unless work["edition"].nil?
43
+ if work.edition.kind_of?(Array)
44
+ work.edition.each do |edition|
45
+ new_corpus.push(CorpusHash.new({
46
+ groupname: groupname,
47
+ language: work["xml:lang"],
48
+ type: "edition",
49
+ }).merge(edition))
50
+ end
51
+ else
52
+ new_corpus.push(CorpusHash.new({
53
+ groupname: groupname,
54
+ language: work["xml:lang"],
55
+ type: "edition",
56
+ }).merge(work.edition))
57
+ end
58
+ end
59
+ # Check to see if we have translations
60
+ unless work["translation"].nil?
61
+ # Check if we have many translations
62
+ if work.translation.kind_of?(Array)
63
+ work.translation.each do |translation|
64
+ new_corpus.push(CorpusHash.new({
65
+ groupname: groupname,
66
+ language: translation["xml:lang"],
67
+ type: "edition",
68
+ }).merge(translation))
69
+ end
70
+ else
71
+ new_corpus.push(CorpusHash.new({
72
+ groupname: groupname,
73
+ language: work.translation["xml:lang"],
74
+ type: "edition",
75
+ }).merge(work.translation))
76
+ end
77
+ end
78
+ rescue Exception => e
79
+ puts "exception: #{e.message.red}"
80
+ #puts "Stack trace: #{backtrace.map {|l| " #{l}\n"}.join}"
81
+ puts "We were working in group: #{groupname.cyan} with the following data point:".green
82
+ puts work.inspect.yellow
83
+ end
84
+ end
85
+ end
86
+ new_corpus
87
+ end
88
+
89
+ def generate_json_indeces
90
+ puts "Generating index by groupname"
91
+ File.write(Perseus::CTS_BY_GROUP_JSON_FILE, JSON.pretty_generate(by_groupname))
92
+ puts "Generating index by edition"
93
+ File.write(Perseus::ALL_EDITIONS_JSON, JSON.pretty_generate(by_edition))
94
+ puts "DONE".green
95
+ end
96
+ end
97
+ end