bomdb 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 033e279990a8a889f27f0cfd0569d3e76e9345aa
4
- data.tar.gz: f8a5d832f7398bd0caeb5669785dd337519baa41
3
+ metadata.gz: 10684c60ce9c451d4c17ab58a0f102cf418d4b18
4
+ data.tar.gz: 9d383abefdc1e561179b681c0d7edfaa95bc4ea1
5
5
  SHA512:
6
- metadata.gz: 3ae1472a90909e6c194662317bd0d284a9cad4fdf30c5148ed7200f3623d9cdc0085292cc47751fc17f1a2c6099f443ee42f083617f464c469d3017c1e34be1e
7
- data.tar.gz: f832dd8e4625497a14985f9a9e2f684a9ca5025c201d869ac48093ac75cbb96b86eb9f1e21eb532e0e5ba6328c41fa6c70632bad14f1f9afb1cfde7f251fdf29
6
+ metadata.gz: 96ad2c76b012fcf6919ec86878e27eb5e98fd572c63b71731183bd8b1c07435e3f2bbacb13b99e897203a2d3cff67803fab8f973de98aaac197bb9e5d66890f9
7
+ data.tar.gz: 2e7bbdac26409d4a1eb316332598abc16df4493d32d7e241e54a59075293743c71b8fba0b3a4597018e808b8b2264e73e54e0092d04bdff6965de11e60b61b78
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bomdb (0.2.1)
4
+ bomdb (0.2.3)
5
5
  colorize (~> 0.7)
6
6
  constellation (~> 0.1)
7
+ levenshtein-ffi (~> 1.1)
7
8
  sequel (~> 4.21)
8
9
  sqlite3 (~> 1.3)
9
10
  text_clean
@@ -19,6 +20,9 @@ GEM
19
20
  constellation (0.1.1)
20
21
  multi_json
21
22
  diff-lcs (1.2.5)
23
+ ffi (1.9.3)
24
+ levenshtein-ffi (1.1.0)
25
+ ffi (~> 1.9)
22
26
  multi_json (1.11.0)
23
27
  rake (10.4.2)
24
28
  rspec (3.2.0)
data/README.md CHANGED
@@ -104,7 +104,7 @@ Bible-OT (594 refs)
104
104
  ```
105
105
 
106
106
  ```bash
107
- $bomdb references Bible-NT
107
+ $ bomdb references Bible-NT
108
108
  1 Corinthians 15:32
109
109
  Luke 12:19
110
110
  ```
@@ -135,7 +135,13 @@ Note that `align` requires the [dwdiff](http://linux.die.net/man/1/dwdiff) comma
135
135
 
136
136
  ## Installation
137
137
 
138
- Add this line to your application's Gemfile:
138
+ Ruby 2.1 is required. You should also have a normal build environment set up, e.g. command line tools on the mac, or GCC on Linux.
139
+
140
+ To install BomDB for use on the command line, use `gem install`:
141
+
142
+ $ gem install bomdb
143
+
144
+ To include bomdb in another Ruby app, add this line to your application's `Gemfile`:
139
145
 
140
146
  ```ruby
141
147
  source 'https://rubygems.org'
@@ -147,10 +153,6 @@ And then execute:
147
153
 
148
154
  $ bundle
149
155
 
150
- Or install it yourself as:
151
-
152
- $ gem install bomdb
153
-
154
156
  ## Configuration
155
157
 
156
158
  Some settings can be configured in a .bomdb file in your home directory:
@@ -19,12 +19,13 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
  spec.bindir = "bin"
21
21
 
22
- spec.add_dependency 'sequel', '~> 4.21'
23
- spec.add_dependency 'sqlite3', '~> 1.3'
24
- spec.add_dependency 'thor', '~> 0.19'
25
- spec.add_dependency 'constellation', '~> 0.1'
26
- spec.add_dependency 'colorize', '~> 0.7'
22
+ spec.add_dependency 'sequel', '~> 4.21'
23
+ spec.add_dependency 'sqlite3', '~> 1.3'
24
+ spec.add_dependency 'thor', '~> 0.19'
25
+ spec.add_dependency 'constellation', '~> 0.1'
26
+ spec.add_dependency 'colorize', '~> 0.7'
27
27
  spec.add_dependency 'text_clean'
28
+ spec.add_dependency 'levenshtein-ffi', '~> 1.1'
28
29
 
29
30
  # spec.add_development_dependency 'bundler', '~> 1.7'
30
31
  spec.add_development_dependency 'rake', '~> 10.0'
Binary file
@@ -15,8 +15,6 @@ module BomDB
15
15
  end
16
16
  end
17
17
 
18
- require 'byebug'
19
-
20
18
  require 'bomdb/version'
21
19
  require 'bomdb/config'
22
20
  require 'bomdb/schema'
@@ -29,7 +29,12 @@ module BomDB
29
29
  exit -1
30
30
  end
31
31
 
32
- result = importer.import(read(file), format: format)
32
+ begin
33
+ result = importer.import(read(file), format: format)
34
+ rescue JSON::ParserError
35
+ puts "Couldn't parse as JSON. Use '--format=text'?"
36
+ exit -1
37
+ end
33
38
  show_result_and_maybe_exit(result)
34
39
  end
35
40
 
@@ -246,24 +251,21 @@ module BomDB
246
251
  end
247
252
 
248
253
  dwdiff = Diff::Dwdiff.new(options[:dwdiff])
249
- diff = dwdiff.diff(io.string, File.read(file))
254
+ align_str = File.read(file).gsub(/\s\s+/, ' ').gsub(':', '~')
255
+ diff = dwdiff.diff(io.string, align_str)
250
256
 
251
257
  if options[:'diff-only']
252
258
  puts diff
253
259
  exit
254
260
  end
255
261
 
256
- puts Diff::Aligner.parse(diff)
262
+ puts Diff::Aligner.parse(diff).gsub('~', ':')
257
263
  end
258
264
 
259
265
 
260
266
 
261
267
  private
262
268
 
263
- def datafile(file)
264
-
265
- end
266
-
267
269
  def read(file)
268
270
  File.read(relative_or_data_file(file))
269
271
  end
@@ -1,39 +1,53 @@
1
1
  require 'strscan'
2
+ require 'levenshtein'
2
3
 
3
4
  module BomDB
4
5
  module Diff
5
6
  class Aligner
6
7
  DIFF_RE = /\{(\+|\-)(.+?)\1\}/
7
8
  INSERT_RE = /\{\+(.+?)\+\}/
9
+ WS_INSERT_RE = /\s?\{\+(.+?)\+\}/
8
10
  VERSE_RE = /\[\|([^\]]+)\|\]/
9
11
 
10
- def self.parse_verse_heading(scanner, deletion, verse_match)
12
+ def self.parse_verse_heading(verse_match, deletion, insertion = nil)
11
13
  # the text of the verse, e.g. "1 Nephi 1:1"
12
14
  verse = verse_match[1]
13
-
14
- # the range of the verse capture, e.g. [2, 17] from ". [|1 Nephi 1:1|]Yea"
15
- verse_capture_slice = Range.new(*verse_match.offset(0), true)
16
-
17
- # the deletion without the verse, e.g. ". Yea"
18
- deletion_without_verse = deletion.clone
19
- deletion_without_verse.slice!(verse_capture_slice)
15
+ before = after = ''
20
16
 
21
17
  # if there's an insertion immediately following...
22
- if scanner.scan(INSERT_RE)
23
- insertion = scanner.matched.match(INSERT_RE)[1]
24
- insert_pos = verse_match.offset(0).first
18
+ if insertion
19
+ # we can assume split will succeed, because the verse was matched
20
+ del_before, del_after = deletion.split(verse_match[0], 2)
25
21
 
26
- # if the match, without the verse heading, is the same size as its
27
- # substitution, then concat the pre_match, add the verse heading, and
28
- # concat the post_match
29
- if insertion.size > insert_pos
30
- insertion[0...insert_pos] + "\n" + verse + insertion[(insert_pos-1)..-1]
22
+ del_before.strip!
23
+ del_after.strip!
24
+
25
+ if del_before.empty? && del_after.empty?
26
+ # do nothing
27
+ elsif del_before.empty?
28
+ # the entire insertion goes after the verse heading
29
+ after = insertion.chomp
30
+ elsif del_after.empty?
31
+ # the entire insertion goes before the verse heading
32
+ before = insertion.chomp
31
33
  else
32
- insertion[0...insert_pos] + "\n" + verse
34
+ # we have to use some heuristics to figure out where to split
35
+ # the insertion.
36
+
37
+ candidates = (0..(insertion.size-1)).map do |i|
38
+ d1 = Levenshtein.distance(del_before, insertion[0..i])
39
+ d2 = Levenshtein.distance(del_after, insertion[(i + 1)..-1])
40
+ d3 = insertion[i] == ' ' ? 1 : 0
41
+ [ d1 + d2 + d3, insertion[0..i].chomp, insertion[(i + 1)..-1].chomp ]
42
+ end.sort_by{ |a| a.first }
43
+ if candidates.empty?
44
+ raise "Unable to find candidate split for #{del_before.inspect}, #{del_after.inspect} on #{insertion.inspect}"
45
+ end
46
+
47
+ score, before, after = candidates.first
33
48
  end
34
- else
35
- "\n" + verse
36
49
  end
50
+ [before, verse, after]
37
51
  end
38
52
 
39
53
  def self.parse(diff_text)
@@ -50,10 +64,22 @@ module BomDB
50
64
  diff_match = DIFF_RE.match(scanner.matched)
51
65
  case diff_match[1]
52
66
  when '-' then # this is a deletion
53
- inner = diff_match[2]
67
+
68
+ delete_inner = diff_match[2] # e.g. ", [|1 Nephi 1:1|] I"
69
+ # see if there's a verse heading in delete_inner
70
+ verse_match = VERSE_RE.match(delete_inner)
71
+
54
72
  # the only deletions we care about are those with verse headings inside them
55
- if verse_match = VERSE_RE.match(inner)
56
- output << parse_verse_heading(scanner, inner, verse_match)
73
+ if verse_match
74
+ if scanner.scan(WS_INSERT_RE)
75
+ ws_insert_match = WS_INSERT_RE.match(scanner.matched)
76
+ insert_inner = ws_insert_match[1]
77
+ else
78
+ insert_inner = nil
79
+ end
80
+ before, verse, after = parse_verse_heading(verse_match, delete_inner, insert_inner)
81
+ output << before + "\n" + verse
82
+ output << " " + after
57
83
  last_pos = scanner.pos
58
84
  end
59
85
  when '+' then # this is an insertion
@@ -65,7 +91,7 @@ module BomDB
65
91
  end
66
92
  end
67
93
 
68
- return output
94
+ return output.gsub(/ +/, ' ').gsub(/ +$/, '')
69
95
  end
70
96
  end
71
97
  end
@@ -7,6 +7,7 @@ module BomDB
7
7
  tables :books, :verses, :editions, :contents
8
8
  DEFAULT_VERSE_CONTENT_RE = /^\s*(.+)\s+(\d+):(\d+)\s+(.*)$/
9
9
  DEFAULT_VERSE_REF_RE = /^\s*(.+)\s+(\d+):(\d+)$/
10
+ MAX_DUPS = 5
10
11
 
11
12
  def import_text(data)
12
13
  if opts[:edition_prefix].nil?
@@ -23,9 +24,11 @@ module BomDB
23
24
  error: "Edition matching prefix '#{opts[:edition_prefix]}' not found"
24
25
  )
25
26
  end
27
+ edition_id = edition[:edition_id]
26
28
 
27
29
  verse_re = opts[:verse_re] || DEFAULT_VERSE_CONTENT_RE
28
30
 
31
+ times_tried = 0
29
32
  data.each_line do |line|
30
33
  if line =~ verse_re
31
34
  book_name, chapter, verse, content = $1, $2, $3, $4
@@ -39,11 +42,24 @@ module BomDB
39
42
  book_id: book[:book_id]
40
43
  )
41
44
 
42
- @db[:contents].insert(
43
- edition_id: edition[:edition_id],
44
- verse_id: verse_id,
45
- content_body: content
46
- )
45
+ begin
46
+ @db[:contents].insert(
47
+ edition_id: edition_id,
48
+ verse_id: verse_id,
49
+ content_body: content
50
+ )
51
+ rescue Sequel::UniqueConstraintViolation => e
52
+ msg = "edition_id: #{edition_id}, verse: '#{book_name} #{chapter}:#{verse}', content: #{content.inspect}"
53
+ $stderr.puts "Warning: duplicate #{msg}"
54
+ times_tried += 1
55
+ if times_tried > MAX_DUPS
56
+ return Import::Result.new(success: false,
57
+ error: "Too many duplicate rows. Stopped at #{msg}"
58
+ )
59
+ else
60
+ next
61
+ end
62
+ end
47
63
  end
48
64
  end
49
65
  Import::Result.new(success: true)
@@ -1,3 +1,3 @@
1
1
  module BomDB
2
- VERSION = "0.2.2"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bomdb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Duane Johnson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-12 00:00:00.000000000 Z
11
+ date: 2015-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: levenshtein-ffi
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.1'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.1'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: rake
99
113
  requirement: !ruby/object:Gem::Requirement