iev 0.3.1 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/iev/db.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # require 'pstore'
2
2
  require_relative "db_cache"
3
3
 
4
- module Iev
4
+ module IEV
5
5
  # Cache class.
6
6
  class Db
7
7
  # @param global_cache [String] filename of global DB
@@ -43,12 +43,12 @@ module Iev
43
43
 
44
44
  # @return [Hash]
45
45
  def new_bib_entry(code, lang)
46
- Iev.get(code, lang)
46
+ IEV.get(code, lang)
47
47
  end
48
48
 
49
49
  # @param dir [String] DB dir
50
50
  # @param global [TrueClass, FalseClass]
51
- # @return [Iev::DbCache, nil]
51
+ # @return [IEV::DbCache, nil]
52
52
  def open_cache_biblio(dir, global: true)
53
53
  return nil if dir.nil?
54
54
 
data/lib/iev/db_cache.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require "fileutils"
2
2
 
3
- module Iev
3
+ module IEV
4
4
  class DbCache
5
5
  # @return [String]
6
6
  attr_reader :dir
@@ -73,7 +73,7 @@ module Iev
73
73
  end
74
74
 
75
75
  # Set version of the DB to the gem version.
76
- # @return [Iev::DbCache]
76
+ # @return [IEV::DbCache]
77
77
  def set_version
78
78
  File.write "#{@dir}/version", VERSION, encoding: "utf-8"
79
79
  self
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ # (c) Copyright 2020 Ribose Inc.
4
+ #
5
+
6
+ module IEV
7
+ class DbWriter
8
+ include CLI::UI
9
+ using DataConversions
10
+
11
+ attr_reader :db
12
+
13
+ def initialize(db)
14
+ @db = db
15
+ end
16
+
17
+ def import_spreadsheet(file)
18
+ Profiler.measure("xlsx-import") do
19
+ workbook = open_workbook(file)
20
+ row_enumerator = workbook.sheets.first.simple_rows.each
21
+
22
+ title_row = row_enumerator.next
23
+ symbolized_title_row = title_row.compact.transform_values(&:to_sym)
24
+
25
+ create_table(symbolized_title_row.values)
26
+
27
+ loop do
28
+ row = row_enumerator.next
29
+ next if row.empty?
30
+ data = prepare_data(row, symbolized_title_row)
31
+ display_progress(data)
32
+ insert_data(data)
33
+ end
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def open_workbook(file)
40
+ info "Opening spreadsheet..."
41
+ Creek::Book.new(file)
42
+ end
43
+
44
+ # Creates a database table which is going to be filled with data extracted
45
+ # from the spreadsheet.
46
+ #
47
+ # Note that columns are defined as +VARCHAR(255)+, but they can store
48
+ # strings of any length without truncating, see:
49
+ # https://www.sqlite.org/faq.html#q9
50
+ def create_table(column_names)
51
+ db.create_table!(:concepts) do
52
+ column_names.each { |cn| column cn, String }
53
+ primary_key column_names[0..1], name: :iev_pk
54
+ index column_names[0]
55
+ index column_names[1]
56
+ end
57
+ end
58
+
59
+ # Replaces A, B, C… keys with real column names and sanitizes cell
60
+ # content.
61
+ def prepare_data(row, title_row)
62
+ data = row.dup
63
+ data.transform_keys! { |k| title_row[k] }
64
+ data.transform_values! { |v| v&.sanitize }
65
+ data
66
+ end
67
+
68
+ def display_progress(data)
69
+ ievref = data[:IEVREF]
70
+ lang = data[:LANGUAGE].to_three_char_code
71
+ set_ui_tag "#{ievref} (#{lang})"
72
+ progress "Importing term #{ievref} (#{lang})..."
73
+ end
74
+
75
+ def insert_data(data)
76
+ db[:concepts].insert(data)
77
+ rescue Sequel::UniqueConstraintViolation
78
+ warn "Duplicated (TERMID, LANGUAGE) pair, skipping"
79
+ end
80
+ end
81
+ end