bomdb 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,72 @@
1
+ require 'strscan'
2
+
3
+ module BomDB
4
+ module Diff
5
+ class Aligner
6
+ DIFF_RE = /\{(\+|\-)(.+?)\1\}/
7
+ INSERT_RE = /\{\+(.+?)\+\}/
8
+ VERSE_RE = /\[\|([^\]]+)\|\]/
9
+
10
+ def self.parse_verse_heading(scanner, deletion, verse_match)
11
+ # the text of the verse, e.g. "1 Nephi 1:1"
12
+ verse = verse_match[1]
13
+
14
+ # the range of the verse capture, e.g. [2, 17] from ". [|1 Nephi 1:1|]Yea"
15
+ verse_capture_slice = Range.new(*verse_match.offset(0), true)
16
+
17
+ # the deletion without the verse, e.g. ". Yea"
18
+ deletion_without_verse = deletion.clone
19
+ deletion_without_verse.slice!(verse_capture_slice)
20
+
21
+ # if there's an insertion immediately following...
22
+ if scanner.scan(INSERT_RE)
23
+ insertion = scanner.matched.match(INSERT_RE)[1]
24
+ insert_pos = verse_match.offset(0).first
25
+
26
+ # if the match, without the verse heading, is the same size as its
27
+ # substitution, then concat the pre_match, add the verse heading, and
28
+ # concat the post_match
29
+ if insertion.size > insert_pos
30
+ insertion[0...insert_pos] + "\n" + verse + insertion[(insert_pos-1)..-1]
31
+ else
32
+ insertion[0...insert_pos] + "\n" + verse
33
+ end
34
+ else
35
+ "\n" + verse
36
+ end
37
+ end
38
+
39
+ def self.parse(diff_text)
40
+ scanner = StringScanner.new(diff_text)
41
+
42
+ output = ""
43
+
44
+ last_pos = 0
45
+ while !scanner.eos?
46
+ if scanner.scan_until(DIFF_RE)
47
+ output << scanner.pre_match[last_pos..-1]
48
+ last_pos = scanner.pos
49
+
50
+ diff_match = DIFF_RE.match(scanner.matched)
51
+ case diff_match[1]
52
+ when '-' then # this is a deletion
53
+ inner = diff_match[2]
54
+ # the only deletions we care about are those with verse headings inside them
55
+ if verse_match = VERSE_RE.match(inner)
56
+ output << parse_verse_heading(scanner, inner, verse_match)
57
+ last_pos = scanner.pos
58
+ end
59
+ when '+' then # this is an insertion
60
+ output << diff_match[2]
61
+ end
62
+ else
63
+ output << scanner.rest
64
+ break
65
+ end
66
+ end
67
+
68
+ return output
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,28 @@
1
+ require 'tmpdir'
2
+
3
+ module BomDB
4
+ module Diff
5
+ # This wraps the command-line tool, dwdiff
6
+ # See http://linux.die.net/man/1/dwdiff
7
+ class Dwdiff
8
+ def initialize(bin = '/usr/local/bin/dwdiff')
9
+ @bin = bin
10
+ end
11
+
12
+ def diff(str1, str2)
13
+ Dir.mktmpdir("bomdb") do |dir|
14
+ file1 = File.join(dir, "file1.txt")
15
+ file2 = File.join(dir, "file2.txt")
16
+ File.open(file1, "w"){ |f1| f1.write(str1) }
17
+ File.open(file2, "w"){ |f2| f2.write(str2) }
18
+ # -w : start-delete marker, {-
19
+ # -x : end-delete marker, -}
20
+ # -y : start-insert marker, {+
21
+ # -z : end-insert marker, +}
22
+ # -P : use punctuation characters as delimiters
23
+ `#{@bin} -w'{-' -x'-}' -y'{+' -z'+}' -P #{file1} #{file2}`
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ module BomDB
2
+ module Export
3
+ class Base
4
+ attr_reader :db, :opts
5
+
6
+ def initialize(db, **opts)
7
+ @db = db
8
+ @opts = opts
9
+ end
10
+
11
+ def export(format: 'json', **options)
12
+ case format
13
+ when 'json' then export_json
14
+ when 'text' then export_text
15
+ else
16
+ return Import::Result.new(
17
+ success: false,
18
+ error: "Unknown format: #{format}"
19
+ )
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,15 @@
1
+ require 'json'
2
+
3
+ module BomDB
4
+ module Export
5
+ class Books < Export::Base
6
+ def export_json
7
+ books = []
8
+ @db[:books].each do |b|
9
+ books << JSON::generate([ b[:book_name], b[:book_group], b[:book_sort] ], array_nl: ' ')
10
+ end
11
+ Export::Result.new(success: true, body: "[\n " + books.join(",\n ") + "\n]\n")
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,100 @@
1
+ require 'json'
2
+ require 'bomdb/models/edition'
3
+
4
+ module BomDB
5
+ module Export
6
+ class Contents < Export::Base
7
+ def export_json
8
+ editions_by_id = selected_editions()
9
+
10
+ contents = {}
11
+ content_query(editions_by_id.keys).each do |r|
12
+ edition = editions_by_id[ r[:edition_id] ]
13
+ book = (contents[ r[:book_name] ] ||= {})
14
+ chapter = (book[ r[:verse_chapter] ] ||= {})
15
+ verse = (chapter[ full_verse_ref(r) ] ||= {})
16
+ verse[ edition[:edition_year] ] = r[:content_body]
17
+ end
18
+
19
+ frame = {
20
+ editions: editions_legend(editions_by_id),
21
+ contents: contents
22
+ }
23
+
24
+ Export::Result.new(success: true, body: JSON.pretty_generate(frame))
25
+ end
26
+
27
+ def export_text
28
+ editions_by_id = selected_editions()
29
+
30
+ output = ""
31
+ editions_by_id.each_pair do |id, edition|
32
+ title = edition[:edition_name]
33
+ output << title + "\n"
34
+ output << ('=' * title.size) + "\n"
35
+
36
+ content_query([id]).each do |r|
37
+ output << full_verse_ref(r) + "\t" + r[:content_body] + "\n"
38
+ end
39
+
40
+ output << "\n"
41
+ end
42
+
43
+ Export::Result.new(success: true, body: output)
44
+ end
45
+
46
+ protected
47
+
48
+ def editions_legend(editions_by_id)
49
+ {}.tap do |editions|
50
+ editions_by_id.each_pair do |id, row|
51
+ year, name = row[:edition_year], row[:edition_name]
52
+ editions[ year ] = { year: year, name: name }
53
+ end
54
+ end
55
+ end
56
+
57
+ def selected_editions
58
+ {}.tap do |editions_by_id|
59
+ if opts[:edition_prefixes] == :all
60
+ # "all" means all editions that actually have content
61
+ edition_query.each do |e|
62
+ editions_by_id[ e[:edition_id] ] = e
63
+ end
64
+ else
65
+ # export editions that are mentioned by name-prefix
66
+ ed_model = Models::Edition.new(@db)
67
+ opts[:edition_prefixes].each do |epat|
68
+ e = ed_model.find(epat)
69
+ editions_by_id[ e[:edition_id] ] = e
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ def edition_query
76
+ @db[:editions].
77
+ left_outer_join(:contents, :edition_id => :edition_id).
78
+ select_group(:editions__edition_id, :edition_year, :edition_name).
79
+ select_append{ Sequel.as(count(:verse_id), :count) }.
80
+ having{ count > 0 }.
81
+ order(:edition_name)
82
+ end
83
+
84
+ def content_query(edition_ids)
85
+ @db[:verses].
86
+ join(:books, :book_id => :book_id).
87
+ join(:editions).
88
+ join(:contents, :edition_id => :edition_id, :verse_id => :verses__verse_id).
89
+ order(:book_sort, :verse_heading, :verse_chapter, :verse_number).
90
+ select(:editions__edition_id, :book_name, :verse_chapter, :verse_number, :content_body).
91
+ where(:editions__edition_id => edition_ids).
92
+ where(:verse_heading => nil)
93
+ end
94
+
95
+ def full_verse_ref(row)
96
+ "#{row[:book_name]} #{row[:verse_chapter]}:#{row[:verse_number]}"
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,15 @@
1
+ require 'json'
2
+
3
+ module BomDB
4
+ module Export
5
+ class Editions < Export::Base
6
+ def export_json
7
+ editions = []
8
+ @db[:editions].order(:edition_year, :edition_name).each do |e|
9
+ editions << JSON::generate([e[:edition_year], e[:edition_name]], array_nl: ' ')
10
+ end
11
+ Export::Result.new(success: true, body: "[\n " + editions.join(",\n ") + "\n]\n")
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ module BomDB
2
+ module Export
3
+ class Result
4
+ attr_reader :success, :error, :body
5
+
6
+ def initialize(success:, body: nil, error: nil)
7
+ @success = success
8
+ @error = error
9
+ @body = body
10
+ end
11
+
12
+ def success?
13
+ @success
14
+ end
15
+
16
+ def to_s
17
+ @body
18
+ end
19
+
20
+ def message
21
+ if @success
22
+ "Succeeded"
23
+ else
24
+ @error.to_s
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,20 @@
1
+ require 'json'
2
+
3
+ module BomDB
4
+ module Export
5
+ class Verses < Export::Base
6
+ def export_json
7
+ verses = []
8
+ @db[:verses].join(:books, :book_id => :book_id).
9
+ where(:verse_heading => nil).
10
+ order(:book_sort, :verse_chapter).
11
+ select_group(:book_name, :verse_chapter).
12
+ select_append{ Sequel.as(max(:verse_number), :count) }.
13
+ each do |v|
14
+ verses << { book: v[:book_name], chapter: v[:verse_chapter], verses: v[:count] }
15
+ end
16
+ Export::Result.new(success: true, body: JSON.pretty_generate(verses))
17
+ end
18
+ end
19
+ end
20
+ end
@@ -3,8 +3,38 @@ require 'json'
3
3
  module BomDB
4
4
  module Import
5
5
  class Base
6
- def initialize(db)
6
+ attr_reader :db, :opts
7
+
8
+ def initialize(db, **opts)
7
9
  @db = db
10
+ @opts = opts
11
+ end
12
+
13
+ def self.tables(*tables)
14
+ @tables = tables
15
+ end
16
+
17
+ def tables
18
+ self.class.instance_variable_get("@tables")
19
+ end
20
+
21
+ def import(data, format: 'json')
22
+ if !schema.has_tables?(tables)
23
+ return Import::Result.new(
24
+ success: false,
25
+ error: "Database table(s) not present: [#{tables.join(', ')}]"
26
+ )
27
+ end
28
+
29
+ case format
30
+ when 'json' then import_json(ensure_parsed_json(data))
31
+ when 'text' then import_text(data)
32
+ else
33
+ return Import::Result.new(
34
+ success: false,
35
+ error: "Unknown format: #{format}"
36
+ )
37
+ end
8
38
  end
9
39
 
10
40
  def ensure_parsed_json(data)
@@ -3,24 +3,15 @@ require 'json'
3
3
  module BomDB
4
4
  module Import
5
5
  class Books < Import::Base
6
-
7
- def reset
8
- schema.reset(:books)
9
- end
6
+ tables :books
10
7
 
11
8
  # Expected data format is:
12
9
  # [
13
10
  # [book_name:String, book_group:String, book_sort:Integer],
14
11
  # ...
15
12
  # ]
16
- def json(data)
17
- if !schema.has_tables?(:books)
18
- return Import::Result.new(
19
- success: false,
20
- error: "Database table 'books' not present."
21
- )
22
- end
23
- ensure_parsed_json(data).each do |name, group, sort|
13
+ def import_json(data)
14
+ data.each do |name, group, sort|
24
15
  @db[:books].insert(
25
16
  book_name: name,
26
17
  book_group: group,
@@ -0,0 +1,137 @@
1
+ require 'bomdb/models/verse'
2
+ require 'bomdb/models/edition'
3
+
4
+ module BomDB
5
+ module Import
6
+ class Contents < Import::Base
7
+ tables :books, :verses, :editions, :contents
8
+ DEFAULT_VERSE_CONTENT_RE = /^\s*(.+)(\d+):(\d+)\s*(.*)$/
9
+ DEFAULT_VERSE_REF_RE = /^([^:]+)\s+(\d+):(\d+)$/
10
+
11
+ def import_text(data)
12
+ if opts[:edition_id].nil?
13
+ raise ArgumentError, "Edition is required for text import of contents"
14
+ end
15
+
16
+ verse_re = opts[:verse_re] || DEFAULT_VERSE_CONTENT_RE
17
+
18
+ data.each_line do |line|
19
+ if line =~ verse_re
20
+ book_name, chapter, verse, content = $1, $2, $3, $4
21
+
22
+ book = find_book(book_name)
23
+ return book if book.is_a?(Import::Result)
24
+
25
+ verse_id = Models::Verse.new(@db).find_or_create(
26
+ chapter: chapter,
27
+ verse: verse,
28
+ book_id: book[:book_id]
29
+ )
30
+
31
+ @db[:contents].insert(
32
+ edition_id: opts[:edition_id],
33
+ verse_id: verse_id,
34
+ content_body: content
35
+ )
36
+ end
37
+ end
38
+ end
39
+
40
+ def import_json(data)
41
+ # this cross-ref is for looking up file-edition-id => database-edition-id
42
+ editions_xref = {}
43
+
44
+ ed_model = Models::Edition.new(@db)
45
+ verse_model = Models::Verse.new(@db)
46
+
47
+ data['editions'].each_pair do |id, e|
48
+ editions_xref[ id ] = ed_model.find_or_create(e["year"].to_i, e["name"])
49
+ end
50
+
51
+ data['contents'].each_pair do |book_name, chapters|
52
+ chapters.each_pair do |chapter, verses|
53
+ verses.each_pair do |verse_full_ref, editions|
54
+ match = DEFAULT_VERSE_REF_RE.match(verse_full_ref)
55
+ if match
56
+ verse_number = match[3].to_i
57
+ verse = verse_model.find(
58
+ book_name: book_name,
59
+ chapter: chapter,
60
+ verse: verse_number
61
+ )
62
+ if verse.nil?
63
+ return Import::Result.new(success: false,
64
+ error: "Unable to find verse: book: " +
65
+ "'#{book_name}', chapter: '#{chapter}', " +
66
+ "verse: '#{verse_number}'")
67
+ end
68
+ editions.each_pair do |file_edition_id, content_body|
69
+ @db[:contents].insert(
70
+ edition_id: editions_xref[ file_edition_id ],
71
+ verse_id: verse[:verse_id],
72
+ content_body: content_body
73
+ )
74
+ end
75
+ else
76
+ $stderr.puts "Unable to parse verse ref from '#{verse_full_ref}', skipping"
77
+ end
78
+ end
79
+ end
80
+ end
81
+ Import::Result.new(success: true)
82
+ end
83
+
84
+ def import_json_old(data)
85
+ data.each_pair do |book_name, year_editions|
86
+ year_editions.each do |year_edition|
87
+ year_edition.each_pair do |year, d|
88
+ m = d["meta"]
89
+
90
+ book = find_book(book_name)
91
+ return book if book.is_a?(Import::Result)
92
+
93
+ verse_id = Models::Verse.new(@db).find_or_create(
94
+ chapter: m['chapter'],
95
+ verse: m['verse'],
96
+ book_id: book[:book_id],
97
+ heading: m['heading']
98
+ )
99
+
100
+ ed_id = opts[:edition_id] || find_or_create_edition(year)
101
+
102
+ @db[:contents].insert(
103
+ edition_id: ed_id,
104
+ verse_id: verse_id,
105
+ content_body: d["content"]
106
+ )
107
+ end
108
+ end
109
+ end
110
+ Import::Result.new(success: true)
111
+ rescue Sequel::UniqueConstraintViolation => e
112
+ Import::Result.new(success: false, error: e)
113
+ end
114
+
115
+ protected
116
+
117
+ def find_book(book_name)
118
+ book = @db[:books].where(:book_name => book_name).first
119
+ if book.nil?
120
+ Import::Result.new(success: false, error: "Unable to find book '#{book_name}'")
121
+ else
122
+ book
123
+ end
124
+ end
125
+
126
+ def find_or_create_edition(year, name = nil)
127
+ name ||= year.to_s
128
+ edition = @db[:editions].where(:edition_year => year).first
129
+ edition_id = (edition && edition[:edition_id]) || @db[:editions].insert(
130
+ edition_name: name,
131
+ edition_year: year
132
+ )
133
+ return edition_id
134
+ end
135
+ end
136
+ end
137
+ end