perseus 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+ #/ Usage: <homer> (-iliad or -odyssey) [options]...
3
+ #/ Fetch content from homers epics by book and line
4
+ # ** Tip: use #/ lines to define the --help usage message.
5
+ $stderr.sync = true
6
+ require 'optparse'
7
+ require 'pry'
8
+ require 'ox'
9
+ require 'awesome_print'
10
+ require 'roman-numerals'
11
+ require 'net/http'
12
+ require 'uri'
13
+
14
+ # default options
15
+ ILIAD = 1
16
+ ODYSSEY = 2
17
+ options = {
18
+ title: nil,
19
+ translate: false,
20
+ language: "grc", #or "eng" or "lat"
21
+ books: [1],#[1, 21, 13]
22
+ lines: [1]#[1, 108, 257]
23
+ }
24
+ # parse arguments
25
+ file = __FILE__
26
+ optparse = OptionParser.new do |opts|
27
+ opts.on("-translate", "--translate") {
28
+ options[:translate] = true
29
+ }
30
+ opts.on("-iliad", "--iliad") {
31
+ options[:title] = ILIAD
32
+ }
33
+ opts.on("-odyssey", "--odyssey") {
34
+ options[:title] = ODYSSEY
35
+ }
36
+ opts.on("-lang", "--language=val", String) { |val|
37
+ options[:language] = val
38
+ }
39
+ opts.on("-b", "--book=val", Integer) { |val|
40
+ options[:books] = [val]
41
+ }
42
+ opts.on("-l", "--line=val", Integer) { |val|
43
+ options[:lines] = [val]
44
+ }
45
+ opts.on("--books=[x,y,z]", Array) { |val|
46
+ options[:books] = val
47
+ }
48
+ opts.on("--lines=[x,y,z]", Array) { |val|
49
+ options[:lines] = val
50
+ }
51
+ opts.on_tail("-h", "--help") {
52
+ exec "grep ^#/<'#{file}'|cut -c4-"
53
+ }
54
+ end
55
+ optparse.parse!
56
+
57
+ raise "Please provide the title of the work you are looking for -> odyssey or iliad".red if options[:title].nil?
58
+
59
+ def open(url)
60
+ Net::HTTP.get(URI.parse(url))
61
+ end
62
+
63
+ def fetch_stanzas title, language, books, lines
64
+ books.each do |book|
65
+ lines.each do |line|
66
+ ctsurl_pfx = "http://www.perseus.tufts.edu/hopper/CTS?request="
67
+ node_urn = "urn:cts:greekLit:tlg0012.tlg00#{title}.perseus-#{language}1:#{book}.#{line}"
68
+ book = node_urn.split(":").last.split(".")[0]
69
+ passage_url = "#{ctsurl_pfx}GetPassage&urn=#{node_urn}"
70
+ passage_xml = open(passage_url)
71
+ passage_content = Ox.parse(passage_xml)
72
+ begin
73
+ line_content = passage_content.nodes[0].nodes[1].nodes[0].nodes[0].nodes[0].nodes[0].nodes[1]
74
+ if line_content.nil?
75
+ line_content = passage_content.nodes[0].nodes[1].nodes[0].nodes[0].nodes[0].nodes[0].nodes[0]
76
+ end
77
+ puts "#{RomanNumerals.to_roman(book.to_i).cyan}\t#{line.to_s.yellow}: #{line_content.green}"
78
+ rescue Exception => e
79
+ if e.message == "undefined method `nodes' for \"3\":String"
80
+ puts "language: #{language}, book: #{RomanNumerals.to_roman(book.to_i)}, line: #{line} missing?".red
81
+ else
82
+ puts e
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ fetch_stanzas options[:title], options[:language], options[:books], options[:lines]
@@ -0,0 +1,140 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'optparse/time'
5
+ require 'ostruct'
6
+ require 'pp'
7
+ require 'awesome_print'
8
+ require 'perseus'
9
+ require 'perseus/network_index_xml'
10
+ require 'perseus/file_index_xml'
11
+
12
+ class OptparseExample
13
+ Version = Perseus::VERSION
14
+ class ScriptOptions
15
+ attr_accessor :query,
16
+ :search_by,
17
+ :verbose
18
+ def initialize
19
+ self.query = nil
20
+ self.search_by = "group"
21
+ self.verbose = false
22
+ end
23
+
24
+ def define_options(parser)
25
+ parser.banner = "Usage: perseus [options]"
26
+ parser.separator ""
27
+ parser.separator "Specific options:"
28
+
29
+ # add additional options
30
+ generate_index(parser)
31
+ search_a_word(parser)
32
+ get_passage_valid_references(parser)
33
+ get_passage(parser)
34
+ search_by_title_option(parser)
35
+ search_by_group_option(parser)
36
+ boolean_verbose_option(parser)
37
+
38
+ parser.separator ""
39
+ parser.separator "Common options:"
40
+ # No argument, shows at tail. This will print an options summary.
41
+ # Try it and see!
42
+ parser.on_tail("-h", "--help", "Show this message") do
43
+ puts parser
44
+ exit
45
+ end
46
+ # Another typical switch to print the version.
47
+ parser.on_tail("--version", "Show version") do
48
+ puts Version
49
+ exit
50
+ end
51
+ end
52
+
53
+ def generate_index(parser)
54
+ #Optional '--type' option argument with keyword completion.
55
+ parser.on("--generate_json []", [:network, :local],
56
+ "Generates the perseus indeces in json format choose fetch method (network -> download, save and index from network, local -> index from already downloaded xml file)") do |source|
57
+ Perseus::NetworkIndexXML.new.generate_json_indeces if source.eql? :network
58
+ Perseus::FileIndexXML.new.generate_json_indeces if source.eql? :local
59
+ exit
60
+ end
61
+ end
62
+
63
+ def search_a_word(parser)
64
+ parser.on("--word WORD", "Search ancient greek or latin word") do |word|
65
+ w = Perseus::Dictionary.new(word)
66
+ puts JSON.pretty_generate(w.to_h)
67
+ exit
68
+ end
69
+ end
70
+
71
+ def get_passage_valid_references(parser)
72
+ parser.on("--valid_refs URN", "Get valid urn's to query passage content") do |urn|
73
+ refs = Perseus::CorpusReferences.new(urn)
74
+ reference_links = refs.to_h["GetValidReff"]["reply"]["reff"]["urn"]
75
+ puts "You are about to get a list of valid references for this passage"
76
+ puts "Are you sure you want the all printed on the screen?".red
77
+ puts "Answering anything but yes will only print the first 10 references".yellow
78
+ print_all = gets
79
+ if print_all.eql? "yes"
80
+ reference_links.each { |link| puts link.cyan }
81
+ else
82
+ reference_links.each_with_index { |link, i| puts link.cyan if i < 10 }
83
+ end
84
+ #.map { |e|
85
+ #e.split(":").last
86
+ #}
87
+ exit
88
+ end
89
+ end
90
+
91
+ def get_passage(parser)
92
+ parser.on("--passage URN", "Get passage content for given urn") do |urn|
93
+ passage = Perseus::Passage.new(urn)
94
+ puts JSON.pretty_generate(passage.to_h)
95
+ exit
96
+ end
97
+ end
98
+
99
+ def search_by_title_option(parser)
100
+ parser.on("--title TITLE", "Search by title") do |query|
101
+ self.search_by = "label"
102
+ self.query = query
103
+ end
104
+ end
105
+
106
+ def search_by_group_option(parser)
107
+ parser.on("--author GROUPNAME/AUTHOR", "Search by groupname/author") do |query|
108
+ self.search_by = "groupname"
109
+ self.query = query
110
+ end
111
+ end
112
+
113
+ def boolean_verbose_option(parser)
114
+ # Boolean switch.
115
+ parser.on("-v", "--[no-]verbose", "Run verbosely") do |v|
116
+ self.verbose = v
117
+ end
118
+ end
119
+ end
120
+
121
+ def parse(args)
122
+ @options = ScriptOptions.new
123
+ @args = OptionParser.new do |parser|
124
+ @options.define_options(parser)
125
+ parser.parse!(args)
126
+ end
127
+ @options
128
+ end
129
+
130
+ attr_reader :parser, :options
131
+ end
132
+
133
+ options = OptparseExample.new.parse(ARGV)
134
+ unless options.query.nil?
135
+ texts = Perseus::Corpora.new.all.deep_locate -> (key, value, object) {
136
+ key == options.search_by && value.include?(options.query)
137
+ }
138
+ puts "Found #{texts.size} editions matching your query: #{options.query}"
139
+ Perseus.print_editions texts
140
+ end
@@ -0,0 +1,14 @@
1
+ # perseus api
2
+ require 'pathname'
3
+ require "perseus/version"
4
+ require "perseus/constants"
5
+ require "perseus/helpers"
6
+ require "perseus/corpus_hash"
7
+ require 'perseus/corpora'
8
+ require 'perseus/corpus_references'
9
+ require 'perseus/dictionary'
10
+ require 'perseus/passage'
11
+
12
+ module Perseus
13
+ # Extra or cool stuff here
14
+ end
@@ -0,0 +1,7 @@
1
+ module Perseus
2
+ CTS_PFX = "http://www.perseus.tufts.edu/hopper/CTS?request="
3
+ DATA_DIR = Pathname.new(__FILE__).join("../../../data")
4
+ CTS_XML_FILE = "#{DATA_DIR}/perseus-index.xml"
5
+ CTS_BY_GROUP_JSON_FILE = "#{DATA_DIR}/perseus-index-by-group.json"
6
+ ALL_EDITIONS_JSON = "#{DATA_DIR}/perseus-index-by-edition.json"
7
+ end
@@ -0,0 +1,17 @@
1
+ require 'json'
2
+ require 'hashie'
3
+ require 'perseus/corpus_hash'
4
+
5
+ module Perseus
6
+ class Corpora
7
+ def initialize
8
+ @elements = JSON.parse(File.read(Perseus::ALL_EDITIONS_JSON)).map do |e|
9
+ Perseus::CorpusHash.new e
10
+ end
11
+ @elements.extend(Hashie::Extensions::DeepLocate)
12
+ end
13
+ def all
14
+ @elements
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ require 'hashie'
2
+ module Perseus
3
+ class CorpusHash < Hash
4
+ include Hashie::Extensions::MergeInitializer
5
+ include Hashie::Extensions::IndifferentAccess
6
+ include Hashie::Extensions::MethodAccess
7
+ include Hashie::Extensions::Coercion
8
+ coerce_value Hash, CorpusHash
9
+ def initialize(hash = {})
10
+ super
11
+ hash.each_pair do |k,v|
12
+ if v.kind_of?(Array)
13
+ self[k] = v.map { |v_i| CorpusHash.new(v_i) }
14
+ elsif v.kind_of?(Hash)
15
+ self[k] = CorpusHash.new v
16
+ else
17
+ self[k] = v
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,8 @@
1
+ require 'perseus/cts_element'
2
+ module Perseus
3
+ class CorpusReferences < CTSElement
4
+ def initialize urn
5
+ @urn = "#{CTS_PFX}GetValidReff&urn=#{urn}"
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,19 @@
1
+ require 'json'
2
+ require 'active_support'
3
+ require 'active_support/core_ext'
4
+ require 'net/http'
5
+ require 'uri'
6
+ module Perseus
7
+ class CTSElement
8
+ attr_reader :to_s, :to_h, :to_json
9
+ def to_s
10
+ @to_s ||= Net::HTTP.get(URI.parse(@urn))
11
+ end
12
+ def to_h
13
+ @to_h ||= Hash.from_xml(to_s)
14
+ end
15
+ def to_json
16
+ @to_json ||= to_h.to_json
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,8 @@
1
+ require 'perseus/cts_element'
2
+ module Perseus
3
+ class Dictionary < CTSElement
4
+ def initialize word
5
+ @urn = "http://www.perseus.tufts.edu/hopper/xmlmorph?lang=greek&lookup=#{word}"
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,12 @@
1
+ require 'perseus/index_xml'
2
+ module Perseus
3
+ class FileIndexXML < IndexXML
4
+ def initialize
5
+ puts "Reading from locally saved xml"
6
+ @urn = CTS_XML_FILE
7
+ end
8
+ def to_s
9
+ @to_s ||= File.read(@urn)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,62 @@
1
+ module Perseus
2
+ def self.print_editions editions
3
+ editions.each do |edition|
4
+ print_corpus_info edition.groupname, edition.type, edition.label, edition.urn, edition.language.green, edition.description
5
+ end
6
+ end
7
+ def self.print_corpus texts
8
+ texts.each do |t|
9
+ groupname = t.groupname
10
+ t.work.each_with_index do |work, i|
11
+ begin
12
+ # Check if we have many editions
13
+ unless work["edition"].nil?
14
+ language = work["xml:lang"]
15
+ if work.edition.kind_of?(Array)
16
+ work.edition.each do |edition|
17
+ title = edition.label
18
+ urn = edition.urn
19
+ desc = edition.description
20
+ print_corpus_info groupname, "edition", title, urn, language.green, desc
21
+ end
22
+ else
23
+ title = work.title
24
+ urn = work.edition.urn
25
+ desc = work.edition.description
26
+ print_corpus_info groupname, "edition", title, urn, language.green
27
+ end
28
+ end
29
+ # Check to see if we have translations
30
+ unless work["translation"].nil?
31
+ # Check if we have many translations
32
+ if work.translation.kind_of?(Array)
33
+ work.translation.each do |translation|
34
+ title = translation.label
35
+ urn = translation.urn
36
+ language = translation["xml:lang"]
37
+ print_corpus_info groupname, "translation", title, urn, language.redish
38
+ end
39
+ else
40
+ title = work.title
41
+ urn = work.translation.urn
42
+ language = work.translation["xml:lang"]
43
+ print_corpus_info groupname, "translation", title, urn, language.redish
44
+ end
45
+ end
46
+ rescue Exception => e
47
+ puts "exception: #{e.message.red}"
48
+ #puts "Stack trace: #{backtrace.map {|l| " #{l}\n"}.join}"
49
+ puts "We were working in group: #{groupname.cyan} with the following data point:".green
50
+ puts work.inspect.yellow
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+ def self.print_corpus_info groupname, type, title, urn, language, description = nil
57
+ puts "#{groupname} - #{title.purple}: #{urn.yellow}, #{type}: #{language}"
58
+ unless description.nil?
59
+ puts description.cyan
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,97 @@
1
+ require 'awesome_print'
2
+ require 'perseus/corpus_hash'
3
+ require 'perseus/cts_element'
4
+ module Perseus
5
+ class IndexXML < CTSElement
6
+ attr_reader :corpus_by_groupname, :corpus_by_edition
7
+ def by_groupname
8
+ @corpus_by_groupname ||= generate_structure_by_group
9
+ end
10
+ def by_edition
11
+ @corpus_by_edition ||= generate_structure_by_edition
12
+ end
13
+
14
+ def generate_structure_by_group
15
+ to_h["TextInventory"]["textgroup"].map do |text|
16
+ CorpusHash.new text
17
+ end.map do |t|
18
+ tmp_hash = CorpusHash.new
19
+ t.work.each_with_index do |work, i|
20
+ if work.kind_of?(Array)
21
+ # This is a special kind of array and we need to make
22
+ # it adhere to our protocol
23
+ tmp_hash[work[0]] = work[1]
24
+ #puts work.inspect
25
+ #puts tmp_hash
26
+ if t.work.size - 1 == i
27
+ t.work = [tmp_hash]
28
+ tmp_hash = CorpusHash.new
29
+ end
30
+ end
31
+ end && t
32
+ end
33
+ end
34
+
35
+ def generate_structure_by_edition
36
+ new_corpus = []
37
+ corpus_by_groupname.each do |t|
38
+ groupname = t.groupname
39
+ t.work.each_with_index do |work, i|
40
+ begin
41
+ # Check if we have many editions
42
+ unless work["edition"].nil?
43
+ if work.edition.kind_of?(Array)
44
+ work.edition.each do |edition|
45
+ new_corpus.push(CorpusHash.new({
46
+ groupname: groupname,
47
+ language: work["xml:lang"],
48
+ type: "edition",
49
+ }).merge(edition))
50
+ end
51
+ else
52
+ new_corpus.push(CorpusHash.new({
53
+ groupname: groupname,
54
+ language: work["xml:lang"],
55
+ type: "edition",
56
+ }).merge(work.edition))
57
+ end
58
+ end
59
+ # Check to see if we have translations
60
+ unless work["translation"].nil?
61
+ # Check if we have many translations
62
+ if work.translation.kind_of?(Array)
63
+ work.translation.each do |translation|
64
+ new_corpus.push(CorpusHash.new({
65
+ groupname: groupname,
66
+ language: translation["xml:lang"],
67
+ type: "edition",
68
+ }).merge(translation))
69
+ end
70
+ else
71
+ new_corpus.push(CorpusHash.new({
72
+ groupname: groupname,
73
+ language: work.translation["xml:lang"],
74
+ type: "edition",
75
+ }).merge(work.translation))
76
+ end
77
+ end
78
+ rescue Exception => e
79
+ puts "exception: #{e.message.red}"
80
+ #puts "Stack trace: #{backtrace.map {|l| " #{l}\n"}.join}"
81
+ puts "We were working in group: #{groupname.cyan} with the following data point:".green
82
+ puts work.inspect.yellow
83
+ end
84
+ end
85
+ end
86
+ new_corpus
87
+ end
88
+
89
+ def generate_json_indeces
90
+ puts "Generating index by groupname"
91
+ File.write(Perseus::CTS_BY_GROUP_JSON_FILE, JSON.pretty_generate(by_groupname))
92
+ puts "Generating index by edition"
93
+ File.write(Perseus::ALL_EDITIONS_JSON, JSON.pretty_generate(by_edition))
94
+ puts "DONE".green
95
+ end
96
+ end
97
+ end