bomdb 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -1
- data/README.md +8 -6
- data/bomdb.gemspec +6 -5
- data/data/book_of_mormon.db +0 -0
- data/lib/bomdb.rb +0 -2
- data/lib/bomdb/cli/application.rb +9 -7
- data/lib/bomdb/diff/aligner.rb +49 -23
- data/lib/bomdb/import/contents.rb +21 -5
- data/lib/bomdb/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10684c60ce9c451d4c17ab58a0f102cf418d4b18
|
4
|
+
data.tar.gz: 9d383abefdc1e561179b681c0d7edfaa95bc4ea1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96ad2c76b012fcf6919ec86878e27eb5e98fd572c63b71731183bd8b1c07435e3f2bbacb13b99e897203a2d3cff67803fab8f973de98aaac197bb9e5d66890f9
|
7
|
+
data.tar.gz: 2e7bbdac26409d4a1eb316332598abc16df4493d32d7e241e54a59075293743c71b8fba0b3a4597018e808b8b2264e73e54e0092d04bdff6965de11e60b61b78
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
bomdb (0.2.
|
4
|
+
bomdb (0.2.3)
|
5
5
|
colorize (~> 0.7)
|
6
6
|
constellation (~> 0.1)
|
7
|
+
levenshtein-ffi (~> 1.1)
|
7
8
|
sequel (~> 4.21)
|
8
9
|
sqlite3 (~> 1.3)
|
9
10
|
text_clean
|
@@ -19,6 +20,9 @@ GEM
|
|
19
20
|
constellation (0.1.1)
|
20
21
|
multi_json
|
21
22
|
diff-lcs (1.2.5)
|
23
|
+
ffi (1.9.3)
|
24
|
+
levenshtein-ffi (1.1.0)
|
25
|
+
ffi (~> 1.9)
|
22
26
|
multi_json (1.11.0)
|
23
27
|
rake (10.4.2)
|
24
28
|
rspec (3.2.0)
|
data/README.md
CHANGED
@@ -104,7 +104,7 @@ Bible-OT (594 refs)
|
|
104
104
|
```
|
105
105
|
|
106
106
|
```bash
|
107
|
-
$bomdb references Bible-NT
|
107
|
+
$ bomdb references Bible-NT
|
108
108
|
1 Corinthians 15:32
|
109
109
|
Luke 12:19
|
110
110
|
```
|
@@ -135,7 +135,13 @@ Note that `align` requires the [dwdiff](http://linux.die.net/man/1/dwdiff) comma
|
|
135
135
|
|
136
136
|
## Installation
|
137
137
|
|
138
|
-
|
138
|
+
Ruby 2.1 is required. You should also have a normal build environment set up, e.g. command line tools on the mac, or GCC on Linux.
|
139
|
+
|
140
|
+
To install BomDB for use on the command line, use `gem install`:
|
141
|
+
|
142
|
+
$ gem install bomdb
|
143
|
+
|
144
|
+
To include bomdb in another Ruby app, add this line to your application's `Gemfile`:
|
139
145
|
|
140
146
|
```ruby
|
141
147
|
source 'https://rubygems.org'
|
@@ -147,10 +153,6 @@ And then execute:
|
|
147
153
|
|
148
154
|
$ bundle
|
149
155
|
|
150
|
-
Or install it yourself as:
|
151
|
-
|
152
|
-
$ gem install bomdb
|
153
|
-
|
154
156
|
## Configuration
|
155
157
|
|
156
158
|
Some settings can be configured in a .bomdb file in your home directory:
|
data/bomdb.gemspec
CHANGED
@@ -19,12 +19,13 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
spec.bindir = "bin"
|
21
21
|
|
22
|
-
spec.add_dependency 'sequel',
|
23
|
-
spec.add_dependency 'sqlite3',
|
24
|
-
spec.add_dependency 'thor',
|
25
|
-
spec.add_dependency 'constellation',
|
26
|
-
spec.add_dependency 'colorize',
|
22
|
+
spec.add_dependency 'sequel', '~> 4.21'
|
23
|
+
spec.add_dependency 'sqlite3', '~> 1.3'
|
24
|
+
spec.add_dependency 'thor', '~> 0.19'
|
25
|
+
spec.add_dependency 'constellation', '~> 0.1'
|
26
|
+
spec.add_dependency 'colorize', '~> 0.7'
|
27
27
|
spec.add_dependency 'text_clean'
|
28
|
+
spec.add_dependency 'levenshtein-ffi', '~> 1.1'
|
28
29
|
|
29
30
|
# spec.add_development_dependency 'bundler', '~> 1.7'
|
30
31
|
spec.add_development_dependency 'rake', '~> 10.0'
|
data/data/book_of_mormon.db
CHANGED
Binary file
|
data/lib/bomdb.rb
CHANGED
@@ -29,7 +29,12 @@ module BomDB
|
|
29
29
|
exit -1
|
30
30
|
end
|
31
31
|
|
32
|
-
|
32
|
+
begin
|
33
|
+
result = importer.import(read(file), format: format)
|
34
|
+
rescue JSON::ParserError
|
35
|
+
puts "Couldn't parse as JSON. Use '--format=text'?"
|
36
|
+
exit -1
|
37
|
+
end
|
33
38
|
show_result_and_maybe_exit(result)
|
34
39
|
end
|
35
40
|
|
@@ -246,24 +251,21 @@ module BomDB
|
|
246
251
|
end
|
247
252
|
|
248
253
|
dwdiff = Diff::Dwdiff.new(options[:dwdiff])
|
249
|
-
|
254
|
+
align_str = File.read(file).gsub(/\s\s+/, ' ').gsub(':', '~')
|
255
|
+
diff = dwdiff.diff(io.string, align_str)
|
250
256
|
|
251
257
|
if options[:'diff-only']
|
252
258
|
puts diff
|
253
259
|
exit
|
254
260
|
end
|
255
261
|
|
256
|
-
puts Diff::Aligner.parse(diff)
|
262
|
+
puts Diff::Aligner.parse(diff).gsub('~', ':')
|
257
263
|
end
|
258
264
|
|
259
265
|
|
260
266
|
|
261
267
|
private
|
262
268
|
|
263
|
-
def datafile(file)
|
264
|
-
|
265
|
-
end
|
266
|
-
|
267
269
|
def read(file)
|
268
270
|
File.read(relative_or_data_file(file))
|
269
271
|
end
|
data/lib/bomdb/diff/aligner.rb
CHANGED
@@ -1,39 +1,53 @@
|
|
1
1
|
require 'strscan'
|
2
|
+
require 'levenshtein'
|
2
3
|
|
3
4
|
module BomDB
|
4
5
|
module Diff
|
5
6
|
class Aligner
|
6
7
|
DIFF_RE = /\{(\+|\-)(.+?)\1\}/
|
7
8
|
INSERT_RE = /\{\+(.+?)\+\}/
|
9
|
+
WS_INSERT_RE = /\s?\{\+(.+?)\+\}/
|
8
10
|
VERSE_RE = /\[\|([^\]]+)\|\]/
|
9
11
|
|
10
|
-
def self.parse_verse_heading(
|
12
|
+
def self.parse_verse_heading(verse_match, deletion, insertion = nil)
|
11
13
|
# the text of the verse, e.g. "1 Nephi 1:1"
|
12
14
|
verse = verse_match[1]
|
13
|
-
|
14
|
-
# the range of the verse capture, e.g. [2, 17] from ". [|1 Nephi 1:1|]Yea"
|
15
|
-
verse_capture_slice = Range.new(*verse_match.offset(0), true)
|
16
|
-
|
17
|
-
# the deletion without the verse, e.g. ". Yea"
|
18
|
-
deletion_without_verse = deletion.clone
|
19
|
-
deletion_without_verse.slice!(verse_capture_slice)
|
15
|
+
before = after = ''
|
20
16
|
|
21
17
|
# if there's an insertion immediately following...
|
22
|
-
if
|
23
|
-
|
24
|
-
|
18
|
+
if insertion
|
19
|
+
# we can assume split will succeed, because the verse was matched
|
20
|
+
del_before, del_after = deletion.split(verse_match[0], 2)
|
25
21
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
if
|
30
|
-
|
22
|
+
del_before.strip!
|
23
|
+
del_after.strip!
|
24
|
+
|
25
|
+
if del_before.empty? && del_after.empty?
|
26
|
+
# do nothing
|
27
|
+
elsif del_before.empty?
|
28
|
+
# the entire insertion goes after the verse heading
|
29
|
+
after = insertion.chomp
|
30
|
+
elsif del_after.empty?
|
31
|
+
# the entire insertion goes before the verse heading
|
32
|
+
before = insertion.chomp
|
31
33
|
else
|
32
|
-
|
34
|
+
# we have to use some heuristics to figure out where to split
|
35
|
+
# the insertion.
|
36
|
+
|
37
|
+
candidates = (0..(insertion.size-1)).map do |i|
|
38
|
+
d1 = Levenshtein.distance(del_before, insertion[0..i])
|
39
|
+
d2 = Levenshtein.distance(del_after, insertion[(i + 1)..-1])
|
40
|
+
d3 = insertion[i] == ' ' ? 1 : 0
|
41
|
+
[ d1 + d2 + d3, insertion[0..i].chomp, insertion[(i + 1)..-1].chomp ]
|
42
|
+
end.sort_by{ |a| a.first }
|
43
|
+
if candidates.empty?
|
44
|
+
raise "Unable to find candidate split for #{del_before.inspect}, #{del_after.inspect} on #{insertion.inspect}"
|
45
|
+
end
|
46
|
+
|
47
|
+
score, before, after = candidates.first
|
33
48
|
end
|
34
|
-
else
|
35
|
-
"\n" + verse
|
36
49
|
end
|
50
|
+
[before, verse, after]
|
37
51
|
end
|
38
52
|
|
39
53
|
def self.parse(diff_text)
|
@@ -50,10 +64,22 @@ module BomDB
|
|
50
64
|
diff_match = DIFF_RE.match(scanner.matched)
|
51
65
|
case diff_match[1]
|
52
66
|
when '-' then # this is a deletion
|
53
|
-
|
67
|
+
|
68
|
+
delete_inner = diff_match[2] # e.g. ", [|1 Nephi 1:1|] I"
|
69
|
+
# see if there's a verse heading in delete_inner
|
70
|
+
verse_match = VERSE_RE.match(delete_inner)
|
71
|
+
|
54
72
|
# the only deletions we care about are those with verse headings inside them
|
55
|
-
if verse_match
|
56
|
-
|
73
|
+
if verse_match
|
74
|
+
if scanner.scan(WS_INSERT_RE)
|
75
|
+
ws_insert_match = WS_INSERT_RE.match(scanner.matched)
|
76
|
+
insert_inner = ws_insert_match[1]
|
77
|
+
else
|
78
|
+
insert_inner = nil
|
79
|
+
end
|
80
|
+
before, verse, after = parse_verse_heading(verse_match, delete_inner, insert_inner)
|
81
|
+
output << before + "\n" + verse
|
82
|
+
output << " " + after
|
57
83
|
last_pos = scanner.pos
|
58
84
|
end
|
59
85
|
when '+' then # this is an insertion
|
@@ -65,7 +91,7 @@ module BomDB
|
|
65
91
|
end
|
66
92
|
end
|
67
93
|
|
68
|
-
return output
|
94
|
+
return output.gsub(/ +/, ' ').gsub(/ +$/, '')
|
69
95
|
end
|
70
96
|
end
|
71
97
|
end
|
@@ -7,6 +7,7 @@ module BomDB
|
|
7
7
|
tables :books, :verses, :editions, :contents
|
8
8
|
DEFAULT_VERSE_CONTENT_RE = /^\s*(.+)\s+(\d+):(\d+)\s+(.*)$/
|
9
9
|
DEFAULT_VERSE_REF_RE = /^\s*(.+)\s+(\d+):(\d+)$/
|
10
|
+
MAX_DUPS = 5
|
10
11
|
|
11
12
|
def import_text(data)
|
12
13
|
if opts[:edition_prefix].nil?
|
@@ -23,9 +24,11 @@ module BomDB
|
|
23
24
|
error: "Edition matching prefix '#{opts[:edition_prefix]}' not found"
|
24
25
|
)
|
25
26
|
end
|
27
|
+
edition_id = edition[:edition_id]
|
26
28
|
|
27
29
|
verse_re = opts[:verse_re] || DEFAULT_VERSE_CONTENT_RE
|
28
30
|
|
31
|
+
times_tried = 0
|
29
32
|
data.each_line do |line|
|
30
33
|
if line =~ verse_re
|
31
34
|
book_name, chapter, verse, content = $1, $2, $3, $4
|
@@ -39,11 +42,24 @@ module BomDB
|
|
39
42
|
book_id: book[:book_id]
|
40
43
|
)
|
41
44
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
begin
|
46
|
+
@db[:contents].insert(
|
47
|
+
edition_id: edition_id,
|
48
|
+
verse_id: verse_id,
|
49
|
+
content_body: content
|
50
|
+
)
|
51
|
+
rescue Sequel::UniqueConstraintViolation => e
|
52
|
+
msg = "edition_id: #{edition_id}, verse: '#{book_name} #{chapter}:#{verse}', content: #{content.inspect}"
|
53
|
+
$stderr.puts "Warning: duplicate #{msg}"
|
54
|
+
times_tried += 1
|
55
|
+
if times_tried > MAX_DUPS
|
56
|
+
return Import::Result.new(success: false,
|
57
|
+
error: "Too many duplicate rows. Stopped at #{msg}"
|
58
|
+
)
|
59
|
+
else
|
60
|
+
next
|
61
|
+
end
|
62
|
+
end
|
47
63
|
end
|
48
64
|
end
|
49
65
|
Import::Result.new(success: true)
|
data/lib/bomdb/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bomdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Duane Johnson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sequel
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: levenshtein-ffi
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.1'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.1'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: rake
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|