nameday_vvc_pdf_extractor 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 443afb595bb2e7c47350fb02f047bdae5703fadabb3bad7b18573875ebc5ea87
4
- data.tar.gz: 61d36adce7e2df9f4f66ff3d0a3e258f184c6cbba47a862558048c2cd78f360a
3
+ metadata.gz: d4e37438d8abad8fdd52b4cfceacb64358dc2df629ba34f5c1c3b0ebb9673e5f
4
+ data.tar.gz: e1c2b0e924bf98d906bca47bca2babf889dfe2992778bf372ed3a1134f8e9d1b
5
5
  SHA512:
6
- metadata.gz: ba2e585871816be38ca58fcdcdce520766203b6ab20860dc280fa287761583071b1d5469ae513d2917aad8aaebab3b0095daf197453a29c9324fe55b75ad3313
7
- data.tar.gz: 2284f690cdd3af7dfa87b86b0ee75383416f9700d09c8c6c5d2d57ea7ab1b155ea54c0da07706fe4de051116cb86d3b406fd0421fde743e5f97ef4b41341f494
6
+ metadata.gz: fcfc82617988a038a37e31495e33e74a54809dbeffe0283b326b2ac0c2feaea4348bc4899c72dd89a0d8561b4efd16980cfb9b2a97b6218157e5ad1dc9f86831
7
+ data.tar.gz: 1e5c09a644d3efca24b87b9f3672eb18c81ed97f8ad2d43b720b6a5025277c4f5bd539566682b1dca9469d2d8b1dc5936305d8576cbee72ac194d523015c54c2
@@ -4,6 +4,8 @@ require "pdf-reader"
4
4
  require "date"
5
5
 
6
6
  module Nameday
7
+ # Use this class to extract structured nameday information
8
+ # from pre-existing VVC PDF file
7
9
  class VvcPdfExtractor
8
10
  EMPTY_NAMEDAY_REGEXP = /\p{Pd}/ # Unicode category "Punctuation: Dash"
9
11
  TEXT_ROW_DELIMITER = "\n"
@@ -26,6 +28,7 @@ module Nameday
26
28
  attr_reader :output
27
29
 
28
30
  def initialize
31
+ @pdf_reader = nil
29
32
  @output = {}
30
33
  end
31
34
 
@@ -35,6 +38,7 @@ module Nameday
35
38
 
36
39
  def extract
37
40
  raise("PDF not opened!") unless @pdf_reader
41
+
38
42
  process_pdf
39
43
  output
40
44
  end
@@ -53,24 +57,29 @@ module Nameday
53
57
 
54
58
  def process_pdf
55
59
  return unless @output == {}
60
+
56
61
  prepare_output
57
62
 
58
63
  @current_month_index = nil
59
- @pdf_reader.pages.each do |page|
60
- process_pdf_page(page)
64
+ @pdf_reader.pages.each do |pdf_page|
65
+ process_pdf_page(pdf_page)
61
66
  end
62
67
  end
63
68
 
64
69
  def process_pdf_page(pdf_page)
65
- text_rows = pdf_page.text.split(TEXT_ROW_DELIMITER).map!(&:strip)
70
+ text_rows = pdf_page.text.split(TEXT_ROW_DELIMITER).map(&:strip)
66
71
  text_rows.each do |text_row|
67
72
  next if text_row.empty?
68
73
 
69
- if (new_month_index = MONTH_NAMES.index(text_row))
70
- @current_month_index = new_month_index
71
- elsif text_row.match?(/^\d+\./)
72
- process_nameday_value(text_row)
73
- end
74
+ process_text_row(text_row)
75
+ end
76
+ end
77
+
78
+ def process_text_row(text_row)
79
+ if (new_month_index = MONTH_NAMES.index(text_row))
80
+ @current_month_index = new_month_index
81
+ elsif text_row.match?(/^\d+\./)
82
+ process_nameday_value(text_row)
74
83
  end
75
84
  end
76
85
 
@@ -80,6 +89,7 @@ module Nameday
80
89
 
81
90
  nameday_data[1].split(",").each do |name|
82
91
  next if name.match?(EMPTY_NAMEDAY_REGEXP)
92
+
83
93
  @output[@current_month_index][day] ||= []
84
94
  @output[@current_month_index][day] << name.strip
85
95
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nameday_vvc_pdf_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandrs Ļedovskis