bomdb 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 033e279990a8a889f27f0cfd0569d3e76e9345aa
4
- data.tar.gz: f8a5d832f7398bd0caeb5669785dd337519baa41
3
+ metadata.gz: 10684c60ce9c451d4c17ab58a0f102cf418d4b18
4
+ data.tar.gz: 9d383abefdc1e561179b681c0d7edfaa95bc4ea1
5
5
  SHA512:
6
- metadata.gz: 3ae1472a90909e6c194662317bd0d284a9cad4fdf30c5148ed7200f3623d9cdc0085292cc47751fc17f1a2c6099f443ee42f083617f464c469d3017c1e34be1e
7
- data.tar.gz: f832dd8e4625497a14985f9a9e2f684a9ca5025c201d869ac48093ac75cbb96b86eb9f1e21eb532e0e5ba6328c41fa6c70632bad14f1f9afb1cfde7f251fdf29
6
+ metadata.gz: 96ad2c76b012fcf6919ec86878e27eb5e98fd572c63b71731183bd8b1c07435e3f2bbacb13b99e897203a2d3cff67803fab8f973de98aaac197bb9e5d66890f9
7
+ data.tar.gz: 2e7bbdac26409d4a1eb316332598abc16df4493d32d7e241e54a59075293743c71b8fba0b3a4597018e808b8b2264e73e54e0092d04bdff6965de11e60b61b78
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- bomdb (0.2.1)
4
+ bomdb (0.2.3)
5
5
  colorize (~> 0.7)
6
6
  constellation (~> 0.1)
7
+ levenshtein-ffi (~> 1.1)
7
8
  sequel (~> 4.21)
8
9
  sqlite3 (~> 1.3)
9
10
  text_clean
@@ -19,6 +20,9 @@ GEM
19
20
  constellation (0.1.1)
20
21
  multi_json
21
22
  diff-lcs (1.2.5)
23
+ ffi (1.9.3)
24
+ levenshtein-ffi (1.1.0)
25
+ ffi (~> 1.9)
22
26
  multi_json (1.11.0)
23
27
  rake (10.4.2)
24
28
  rspec (3.2.0)
data/README.md CHANGED
@@ -104,7 +104,7 @@ Bible-OT (594 refs)
104
104
  ```
105
105
 
106
106
  ```bash
107
- $bomdb references Bible-NT
107
+ $ bomdb references Bible-NT
108
108
  1 Corinthians 15:32
109
109
  Luke 12:19
110
110
  ```
@@ -135,7 +135,13 @@ Note that `align` requires the [dwdiff](http://linux.die.net/man/1/dwdiff) comma
135
135
 
136
136
  ## Installation
137
137
 
138
- Add this line to your application's Gemfile:
138
+ Ruby 2.1 is required. You should also have a normal build environment set up, e.g. command line tools on the mac, or GCC on Linux.
139
+
140
+ To install BomDB for use on the command line, use `gem install`:
141
+
142
+ $ gem install bomdb
143
+
144
+ To include bomdb in another Ruby app, add this line to your application's `Gemfile`:
139
145
 
140
146
  ```ruby
141
147
  source 'https://rubygems.org'
@@ -147,10 +153,6 @@ And then execute:
147
153
 
148
154
  $ bundle
149
155
 
150
- Or install it yourself as:
151
-
152
- $ gem install bomdb
153
-
154
156
  ## Configuration
155
157
 
156
158
  Some settings can be configured in a .bomdb file in your home directory:
@@ -19,12 +19,13 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
  spec.bindir = "bin"
21
21
 
22
- spec.add_dependency 'sequel', '~> 4.21'
23
- spec.add_dependency 'sqlite3', '~> 1.3'
24
- spec.add_dependency 'thor', '~> 0.19'
25
- spec.add_dependency 'constellation', '~> 0.1'
26
- spec.add_dependency 'colorize', '~> 0.7'
22
+ spec.add_dependency 'sequel', '~> 4.21'
23
+ spec.add_dependency 'sqlite3', '~> 1.3'
24
+ spec.add_dependency 'thor', '~> 0.19'
25
+ spec.add_dependency 'constellation', '~> 0.1'
26
+ spec.add_dependency 'colorize', '~> 0.7'
27
27
  spec.add_dependency 'text_clean'
28
+ spec.add_dependency 'levenshtein-ffi', '~> 1.1'
28
29
 
29
30
  # spec.add_development_dependency 'bundler', '~> 1.7'
30
31
  spec.add_development_dependency 'rake', '~> 10.0'
Binary file
@@ -15,8 +15,6 @@ module BomDB
15
15
  end
16
16
  end
17
17
 
18
- require 'byebug'
19
-
20
18
  require 'bomdb/version'
21
19
  require 'bomdb/config'
22
20
  require 'bomdb/schema'
@@ -29,7 +29,12 @@ module BomDB
29
29
  exit -1
30
30
  end
31
31
 
32
- result = importer.import(read(file), format: format)
32
+ begin
33
+ result = importer.import(read(file), format: format)
34
+ rescue JSON::ParserError
35
+ puts "Couldn't parse as JSON. Use '--format=text'?"
36
+ exit -1
37
+ end
33
38
  show_result_and_maybe_exit(result)
34
39
  end
35
40
 
@@ -246,24 +251,21 @@ module BomDB
246
251
  end
247
252
 
248
253
  dwdiff = Diff::Dwdiff.new(options[:dwdiff])
249
- diff = dwdiff.diff(io.string, File.read(file))
254
+ align_str = File.read(file).gsub(/\s\s+/, ' ').gsub(':', '~')
255
+ diff = dwdiff.diff(io.string, align_str)
250
256
 
251
257
  if options[:'diff-only']
252
258
  puts diff
253
259
  exit
254
260
  end
255
261
 
256
- puts Diff::Aligner.parse(diff)
262
+ puts Diff::Aligner.parse(diff).gsub('~', ':')
257
263
  end
258
264
 
259
265
 
260
266
 
261
267
  private
262
268
 
263
- def datafile(file)
264
-
265
- end
266
-
267
269
  def read(file)
268
270
  File.read(relative_or_data_file(file))
269
271
  end
@@ -1,39 +1,53 @@
1
1
  require 'strscan'
2
+ require 'levenshtein'
2
3
 
3
4
  module BomDB
4
5
  module Diff
5
6
  class Aligner
6
7
  DIFF_RE = /\{(\+|\-)(.+?)\1\}/
7
8
  INSERT_RE = /\{\+(.+?)\+\}/
9
+ WS_INSERT_RE = /\s?\{\+(.+?)\+\}/
8
10
  VERSE_RE = /\[\|([^\]]+)\|\]/
9
11
 
10
- def self.parse_verse_heading(scanner, deletion, verse_match)
12
+ def self.parse_verse_heading(verse_match, deletion, insertion = nil)
11
13
  # the text of the verse, e.g. "1 Nephi 1:1"
12
14
  verse = verse_match[1]
13
-
14
- # the range of the verse capture, e.g. [2, 17] from ". [|1 Nephi 1:1|]Yea"
15
- verse_capture_slice = Range.new(*verse_match.offset(0), true)
16
-
17
- # the deletion without the verse, e.g. ". Yea"
18
- deletion_without_verse = deletion.clone
19
- deletion_without_verse.slice!(verse_capture_slice)
15
+ before = after = ''
20
16
 
21
17
  # if there's an insertion immediately following...
22
- if scanner.scan(INSERT_RE)
23
- insertion = scanner.matched.match(INSERT_RE)[1]
24
- insert_pos = verse_match.offset(0).first
18
+ if insertion
19
+ # we can assume split will succeed, because the verse was matched
20
+ del_before, del_after = deletion.split(verse_match[0], 2)
25
21
 
26
- # if the match, without the verse heading, is the same size as its
27
- # substitution, then concat the pre_match, add the verse heading, and
28
- # concat the post_match
29
- if insertion.size > insert_pos
30
- insertion[0...insert_pos] + "\n" + verse + insertion[(insert_pos-1)..-1]
22
+ del_before.strip!
23
+ del_after.strip!
24
+
25
+ if del_before.empty? && del_after.empty?
26
+ # do nothing
27
+ elsif del_before.empty?
28
+ # the entire insertion goes after the verse heading
29
+ after = insertion.chomp
30
+ elsif del_after.empty?
31
+ # the entire insertion goes before the verse heading
32
+ before = insertion.chomp
31
33
  else
32
- insertion[0...insert_pos] + "\n" + verse
34
+ # we have to use some heuristics to figure out where to split
35
+ # the insertion.
36
+
37
+ candidates = (0..(insertion.size-1)).map do |i|
38
+ d1 = Levenshtein.distance(del_before, insertion[0..i])
39
+ d2 = Levenshtein.distance(del_after, insertion[(i + 1)..-1])
40
+ d3 = insertion[i] == ' ' ? 1 : 0
41
+ [ d1 + d2 + d3, insertion[0..i].chomp, insertion[(i + 1)..-1].chomp ]
42
+ end.sort_by{ |a| a.first }
43
+ if candidates.empty?
44
+ raise "Unable to find candidate split for #{del_before.inspect}, #{del_after.inspect} on #{insertion.inspect}"
45
+ end
46
+
47
+ score, before, after = candidates.first
33
48
  end
34
- else
35
- "\n" + verse
36
49
  end
50
+ [before, verse, after]
37
51
  end
38
52
 
39
53
  def self.parse(diff_text)
@@ -50,10 +64,22 @@ module BomDB
50
64
  diff_match = DIFF_RE.match(scanner.matched)
51
65
  case diff_match[1]
52
66
  when '-' then # this is a deletion
53
- inner = diff_match[2]
67
+
68
+ delete_inner = diff_match[2] # e.g. ", [|1 Nephi 1:1|] I"
69
+ # see if there's a verse heading in delete_inner
70
+ verse_match = VERSE_RE.match(delete_inner)
71
+
54
72
  # the only deletions we care about are those with verse headings inside them
55
- if verse_match = VERSE_RE.match(inner)
56
- output << parse_verse_heading(scanner, inner, verse_match)
73
+ if verse_match
74
+ if scanner.scan(WS_INSERT_RE)
75
+ ws_insert_match = WS_INSERT_RE.match(scanner.matched)
76
+ insert_inner = ws_insert_match[1]
77
+ else
78
+ insert_inner = nil
79
+ end
80
+ before, verse, after = parse_verse_heading(verse_match, delete_inner, insert_inner)
81
+ output << before + "\n" + verse
82
+ output << " " + after
57
83
  last_pos = scanner.pos
58
84
  end
59
85
  when '+' then # this is an insertion
@@ -65,7 +91,7 @@ module BomDB
65
91
  end
66
92
  end
67
93
 
68
- return output
94
+ return output.gsub(/ +/, ' ').gsub(/ +$/, '')
69
95
  end
70
96
  end
71
97
  end
@@ -7,6 +7,7 @@ module BomDB
7
7
  tables :books, :verses, :editions, :contents
8
8
  DEFAULT_VERSE_CONTENT_RE = /^\s*(.+)\s+(\d+):(\d+)\s+(.*)$/
9
9
  DEFAULT_VERSE_REF_RE = /^\s*(.+)\s+(\d+):(\d+)$/
10
+ MAX_DUPS = 5
10
11
 
11
12
  def import_text(data)
12
13
  if opts[:edition_prefix].nil?
@@ -23,9 +24,11 @@ module BomDB
23
24
  error: "Edition matching prefix '#{opts[:edition_prefix]}' not found"
24
25
  )
25
26
  end
27
+ edition_id = edition[:edition_id]
26
28
 
27
29
  verse_re = opts[:verse_re] || DEFAULT_VERSE_CONTENT_RE
28
30
 
31
+ times_tried = 0
29
32
  data.each_line do |line|
30
33
  if line =~ verse_re
31
34
  book_name, chapter, verse, content = $1, $2, $3, $4
@@ -39,11 +42,24 @@ module BomDB
39
42
  book_id: book[:book_id]
40
43
  )
41
44
 
42
- @db[:contents].insert(
43
- edition_id: edition[:edition_id],
44
- verse_id: verse_id,
45
- content_body: content
46
- )
45
+ begin
46
+ @db[:contents].insert(
47
+ edition_id: edition_id,
48
+ verse_id: verse_id,
49
+ content_body: content
50
+ )
51
+ rescue Sequel::UniqueConstraintViolation => e
52
+ msg = "edition_id: #{edition_id}, verse: '#{book_name} #{chapter}:#{verse}', content: #{content.inspect}"
53
+ $stderr.puts "Warning: duplicate #{msg}"
54
+ times_tried += 1
55
+ if times_tried > MAX_DUPS
56
+ return Import::Result.new(success: false,
57
+ error: "Too many duplicate rows. Stopped at #{msg}"
58
+ )
59
+ else
60
+ next
61
+ end
62
+ end
47
63
  end
48
64
  end
49
65
  Import::Result.new(success: true)
@@ -1,3 +1,3 @@
1
1
  module BomDB
2
- VERSION = "0.2.2"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bomdb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Duane Johnson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-12 00:00:00.000000000 Z
11
+ date: 2015-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: levenshtein-ffi
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.1'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.1'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: rake
99
113
  requirement: !ruby/object:Gem::Requirement