bomdb 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -1
- data/README.md +8 -6
- data/bomdb.gemspec +6 -5
- data/data/book_of_mormon.db +0 -0
- data/lib/bomdb.rb +0 -2
- data/lib/bomdb/cli/application.rb +9 -7
- data/lib/bomdb/diff/aligner.rb +49 -23
- data/lib/bomdb/import/contents.rb +21 -5
- data/lib/bomdb/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10684c60ce9c451d4c17ab58a0f102cf418d4b18
|
4
|
+
data.tar.gz: 9d383abefdc1e561179b681c0d7edfaa95bc4ea1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 96ad2c76b012fcf6919ec86878e27eb5e98fd572c63b71731183bd8b1c07435e3f2bbacb13b99e897203a2d3cff67803fab8f973de98aaac197bb9e5d66890f9
|
7
|
+
data.tar.gz: 2e7bbdac26409d4a1eb316332598abc16df4493d32d7e241e54a59075293743c71b8fba0b3a4597018e808b8b2264e73e54e0092d04bdff6965de11e60b61b78
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
bomdb (0.2.
|
4
|
+
bomdb (0.2.3)
|
5
5
|
colorize (~> 0.7)
|
6
6
|
constellation (~> 0.1)
|
7
|
+
levenshtein-ffi (~> 1.1)
|
7
8
|
sequel (~> 4.21)
|
8
9
|
sqlite3 (~> 1.3)
|
9
10
|
text_clean
|
@@ -19,6 +20,9 @@ GEM
|
|
19
20
|
constellation (0.1.1)
|
20
21
|
multi_json
|
21
22
|
diff-lcs (1.2.5)
|
23
|
+
ffi (1.9.3)
|
24
|
+
levenshtein-ffi (1.1.0)
|
25
|
+
ffi (~> 1.9)
|
22
26
|
multi_json (1.11.0)
|
23
27
|
rake (10.4.2)
|
24
28
|
rspec (3.2.0)
|
data/README.md
CHANGED
@@ -104,7 +104,7 @@ Bible-OT (594 refs)
|
|
104
104
|
```
|
105
105
|
|
106
106
|
```bash
|
107
|
-
$bomdb references Bible-NT
|
107
|
+
$ bomdb references Bible-NT
|
108
108
|
1 Corinthians 15:32
|
109
109
|
Luke 12:19
|
110
110
|
```
|
@@ -135,7 +135,13 @@ Note that `align` requires the [dwdiff](http://linux.die.net/man/1/dwdiff) comma
|
|
135
135
|
|
136
136
|
## Installation
|
137
137
|
|
138
|
-
|
138
|
+
Ruby 2.1 is required. You should also have a normal build environment set up, e.g. command line tools on the mac, or GCC on Linux.
|
139
|
+
|
140
|
+
To install BomDB for use on the command line, use `gem install`:
|
141
|
+
|
142
|
+
$ gem install bomdb
|
143
|
+
|
144
|
+
To include bomdb in another Ruby app, add this line to your application's `Gemfile`:
|
139
145
|
|
140
146
|
```ruby
|
141
147
|
source 'https://rubygems.org'
|
@@ -147,10 +153,6 @@ And then execute:
|
|
147
153
|
|
148
154
|
$ bundle
|
149
155
|
|
150
|
-
Or install it yourself as:
|
151
|
-
|
152
|
-
$ gem install bomdb
|
153
|
-
|
154
156
|
## Configuration
|
155
157
|
|
156
158
|
Some settings can be configured in a .bomdb file in your home directory:
|
data/bomdb.gemspec
CHANGED
@@ -19,12 +19,13 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
spec.bindir = "bin"
|
21
21
|
|
22
|
-
spec.add_dependency 'sequel',
|
23
|
-
spec.add_dependency 'sqlite3',
|
24
|
-
spec.add_dependency 'thor',
|
25
|
-
spec.add_dependency 'constellation',
|
26
|
-
spec.add_dependency 'colorize',
|
22
|
+
spec.add_dependency 'sequel', '~> 4.21'
|
23
|
+
spec.add_dependency 'sqlite3', '~> 1.3'
|
24
|
+
spec.add_dependency 'thor', '~> 0.19'
|
25
|
+
spec.add_dependency 'constellation', '~> 0.1'
|
26
|
+
spec.add_dependency 'colorize', '~> 0.7'
|
27
27
|
spec.add_dependency 'text_clean'
|
28
|
+
spec.add_dependency 'levenshtein-ffi', '~> 1.1'
|
28
29
|
|
29
30
|
# spec.add_development_dependency 'bundler', '~> 1.7'
|
30
31
|
spec.add_development_dependency 'rake', '~> 10.0'
|
data/data/book_of_mormon.db
CHANGED
Binary file
|
data/lib/bomdb.rb
CHANGED
@@ -29,7 +29,12 @@ module BomDB
|
|
29
29
|
exit -1
|
30
30
|
end
|
31
31
|
|
32
|
-
|
32
|
+
begin
|
33
|
+
result = importer.import(read(file), format: format)
|
34
|
+
rescue JSON::ParserError
|
35
|
+
puts "Couldn't parse as JSON. Use '--format=text'?"
|
36
|
+
exit -1
|
37
|
+
end
|
33
38
|
show_result_and_maybe_exit(result)
|
34
39
|
end
|
35
40
|
|
@@ -246,24 +251,21 @@ module BomDB
|
|
246
251
|
end
|
247
252
|
|
248
253
|
dwdiff = Diff::Dwdiff.new(options[:dwdiff])
|
249
|
-
|
254
|
+
align_str = File.read(file).gsub(/\s\s+/, ' ').gsub(':', '~')
|
255
|
+
diff = dwdiff.diff(io.string, align_str)
|
250
256
|
|
251
257
|
if options[:'diff-only']
|
252
258
|
puts diff
|
253
259
|
exit
|
254
260
|
end
|
255
261
|
|
256
|
-
puts Diff::Aligner.parse(diff)
|
262
|
+
puts Diff::Aligner.parse(diff).gsub('~', ':')
|
257
263
|
end
|
258
264
|
|
259
265
|
|
260
266
|
|
261
267
|
private
|
262
268
|
|
263
|
-
def datafile(file)
|
264
|
-
|
265
|
-
end
|
266
|
-
|
267
269
|
def read(file)
|
268
270
|
File.read(relative_or_data_file(file))
|
269
271
|
end
|
data/lib/bomdb/diff/aligner.rb
CHANGED
@@ -1,39 +1,53 @@
|
|
1
1
|
require 'strscan'
|
2
|
+
require 'levenshtein'
|
2
3
|
|
3
4
|
module BomDB
|
4
5
|
module Diff
|
5
6
|
class Aligner
|
6
7
|
DIFF_RE = /\{(\+|\-)(.+?)\1\}/
|
7
8
|
INSERT_RE = /\{\+(.+?)\+\}/
|
9
|
+
WS_INSERT_RE = /\s?\{\+(.+?)\+\}/
|
8
10
|
VERSE_RE = /\[\|([^\]]+)\|\]/
|
9
11
|
|
10
|
-
def self.parse_verse_heading(
|
12
|
+
def self.parse_verse_heading(verse_match, deletion, insertion = nil)
|
11
13
|
# the text of the verse, e.g. "1 Nephi 1:1"
|
12
14
|
verse = verse_match[1]
|
13
|
-
|
14
|
-
# the range of the verse capture, e.g. [2, 17] from ". [|1 Nephi 1:1|]Yea"
|
15
|
-
verse_capture_slice = Range.new(*verse_match.offset(0), true)
|
16
|
-
|
17
|
-
# the deletion without the verse, e.g. ". Yea"
|
18
|
-
deletion_without_verse = deletion.clone
|
19
|
-
deletion_without_verse.slice!(verse_capture_slice)
|
15
|
+
before = after = ''
|
20
16
|
|
21
17
|
# if there's an insertion immediately following...
|
22
|
-
if
|
23
|
-
|
24
|
-
|
18
|
+
if insertion
|
19
|
+
# we can assume split will succeed, because the verse was matched
|
20
|
+
del_before, del_after = deletion.split(verse_match[0], 2)
|
25
21
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
if
|
30
|
-
|
22
|
+
del_before.strip!
|
23
|
+
del_after.strip!
|
24
|
+
|
25
|
+
if del_before.empty? && del_after.empty?
|
26
|
+
# do nothing
|
27
|
+
elsif del_before.empty?
|
28
|
+
# the entire insertion goes after the verse heading
|
29
|
+
after = insertion.chomp
|
30
|
+
elsif del_after.empty?
|
31
|
+
# the entire insertion goes before the verse heading
|
32
|
+
before = insertion.chomp
|
31
33
|
else
|
32
|
-
|
34
|
+
# we have to use some heuristics to figure out where to split
|
35
|
+
# the insertion.
|
36
|
+
|
37
|
+
candidates = (0..(insertion.size-1)).map do |i|
|
38
|
+
d1 = Levenshtein.distance(del_before, insertion[0..i])
|
39
|
+
d2 = Levenshtein.distance(del_after, insertion[(i + 1)..-1])
|
40
|
+
d3 = insertion[i] == ' ' ? 1 : 0
|
41
|
+
[ d1 + d2 + d3, insertion[0..i].chomp, insertion[(i + 1)..-1].chomp ]
|
42
|
+
end.sort_by{ |a| a.first }
|
43
|
+
if candidates.empty?
|
44
|
+
raise "Unable to find candidate split for #{del_before.inspect}, #{del_after.inspect} on #{insertion.inspect}"
|
45
|
+
end
|
46
|
+
|
47
|
+
score, before, after = candidates.first
|
33
48
|
end
|
34
|
-
else
|
35
|
-
"\n" + verse
|
36
49
|
end
|
50
|
+
[before, verse, after]
|
37
51
|
end
|
38
52
|
|
39
53
|
def self.parse(diff_text)
|
@@ -50,10 +64,22 @@ module BomDB
|
|
50
64
|
diff_match = DIFF_RE.match(scanner.matched)
|
51
65
|
case diff_match[1]
|
52
66
|
when '-' then # this is a deletion
|
53
|
-
|
67
|
+
|
68
|
+
delete_inner = diff_match[2] # e.g. ", [|1 Nephi 1:1|] I"
|
69
|
+
# see if there's a verse heading in delete_inner
|
70
|
+
verse_match = VERSE_RE.match(delete_inner)
|
71
|
+
|
54
72
|
# the only deletions we care about are those with verse headings inside them
|
55
|
-
if verse_match
|
56
|
-
|
73
|
+
if verse_match
|
74
|
+
if scanner.scan(WS_INSERT_RE)
|
75
|
+
ws_insert_match = WS_INSERT_RE.match(scanner.matched)
|
76
|
+
insert_inner = ws_insert_match[1]
|
77
|
+
else
|
78
|
+
insert_inner = nil
|
79
|
+
end
|
80
|
+
before, verse, after = parse_verse_heading(verse_match, delete_inner, insert_inner)
|
81
|
+
output << before + "\n" + verse
|
82
|
+
output << " " + after
|
57
83
|
last_pos = scanner.pos
|
58
84
|
end
|
59
85
|
when '+' then # this is an insertion
|
@@ -65,7 +91,7 @@ module BomDB
|
|
65
91
|
end
|
66
92
|
end
|
67
93
|
|
68
|
-
return output
|
94
|
+
return output.gsub(/ +/, ' ').gsub(/ +$/, '')
|
69
95
|
end
|
70
96
|
end
|
71
97
|
end
|
@@ -7,6 +7,7 @@ module BomDB
|
|
7
7
|
tables :books, :verses, :editions, :contents
|
8
8
|
DEFAULT_VERSE_CONTENT_RE = /^\s*(.+)\s+(\d+):(\d+)\s+(.*)$/
|
9
9
|
DEFAULT_VERSE_REF_RE = /^\s*(.+)\s+(\d+):(\d+)$/
|
10
|
+
MAX_DUPS = 5
|
10
11
|
|
11
12
|
def import_text(data)
|
12
13
|
if opts[:edition_prefix].nil?
|
@@ -23,9 +24,11 @@ module BomDB
|
|
23
24
|
error: "Edition matching prefix '#{opts[:edition_prefix]}' not found"
|
24
25
|
)
|
25
26
|
end
|
27
|
+
edition_id = edition[:edition_id]
|
26
28
|
|
27
29
|
verse_re = opts[:verse_re] || DEFAULT_VERSE_CONTENT_RE
|
28
30
|
|
31
|
+
times_tried = 0
|
29
32
|
data.each_line do |line|
|
30
33
|
if line =~ verse_re
|
31
34
|
book_name, chapter, verse, content = $1, $2, $3, $4
|
@@ -39,11 +42,24 @@ module BomDB
|
|
39
42
|
book_id: book[:book_id]
|
40
43
|
)
|
41
44
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
begin
|
46
|
+
@db[:contents].insert(
|
47
|
+
edition_id: edition_id,
|
48
|
+
verse_id: verse_id,
|
49
|
+
content_body: content
|
50
|
+
)
|
51
|
+
rescue Sequel::UniqueConstraintViolation => e
|
52
|
+
msg = "edition_id: #{edition_id}, verse: '#{book_name} #{chapter}:#{verse}', content: #{content.inspect}"
|
53
|
+
$stderr.puts "Warning: duplicate #{msg}"
|
54
|
+
times_tried += 1
|
55
|
+
if times_tried > MAX_DUPS
|
56
|
+
return Import::Result.new(success: false,
|
57
|
+
error: "Too many duplicate rows. Stopped at #{msg}"
|
58
|
+
)
|
59
|
+
else
|
60
|
+
next
|
61
|
+
end
|
62
|
+
end
|
47
63
|
end
|
48
64
|
end
|
49
65
|
Import::Result.new(success: true)
|
data/lib/bomdb/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bomdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Duane Johnson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sequel
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: levenshtein-ffi
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.1'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.1'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: rake
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|