nameday_vvc_pdf_extractor 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 443afb595bb2e7c47350fb02f047bdae5703fadabb3bad7b18573875ebc5ea87
4
- data.tar.gz: 61d36adce7e2df9f4f66ff3d0a3e258f184c6cbba47a862558048c2cd78f360a
3
+ metadata.gz: d4e37438d8abad8fdd52b4cfceacb64358dc2df629ba34f5c1c3b0ebb9673e5f
4
+ data.tar.gz: e1c2b0e924bf98d906bca47bca2babf889dfe2992778bf372ed3a1134f8e9d1b
5
5
  SHA512:
6
- metadata.gz: ba2e585871816be38ca58fcdcdce520766203b6ab20860dc280fa287761583071b1d5469ae513d2917aad8aaebab3b0095daf197453a29c9324fe55b75ad3313
7
- data.tar.gz: 2284f690cdd3af7dfa87b86b0ee75383416f9700d09c8c6c5d2d57ea7ab1b155ea54c0da07706fe4de051116cb86d3b406fd0421fde743e5f97ef4b41341f494
6
+ metadata.gz: fcfc82617988a038a37e31495e33e74a54809dbeffe0283b326b2ac0c2feaea4348bc4899c72dd89a0d8561b4efd16980cfb9b2a97b6218157e5ad1dc9f86831
7
+ data.tar.gz: 1e5c09a644d3efca24b87b9f3672eb18c81ed97f8ad2d43b720b6a5025277c4f5bd539566682b1dca9469d2d8b1dc5936305d8576cbee72ac194d523015c54c2
@@ -4,6 +4,8 @@ require "pdf-reader"
4
4
  require "date"
5
5
 
6
6
  module Nameday
7
+ # Use this class to extract structured nameday information
8
+ # from pre-existing VVC PDF file
7
9
  class VvcPdfExtractor
8
10
  EMPTY_NAMEDAY_REGEXP = /\p{Pd}/ # Unicode category "Punctuation: Dash"
9
11
  TEXT_ROW_DELIMITER = "\n"
@@ -26,6 +28,7 @@ module Nameday
26
28
  attr_reader :output
27
29
 
28
30
  def initialize
31
+ @pdf_reader = nil
29
32
  @output = {}
30
33
  end
31
34
 
@@ -35,6 +38,7 @@ module Nameday
35
38
 
36
39
  def extract
37
40
  raise("PDF not opened!") unless @pdf_reader
41
+
38
42
  process_pdf
39
43
  output
40
44
  end
@@ -53,24 +57,29 @@ module Nameday
53
57
 
54
58
  def process_pdf
55
59
  return unless @output == {}
60
+
56
61
  prepare_output
57
62
 
58
63
  @current_month_index = nil
59
- @pdf_reader.pages.each do |page|
60
- process_pdf_page(page)
64
+ @pdf_reader.pages.each do |pdf_page|
65
+ process_pdf_page(pdf_page)
61
66
  end
62
67
  end
63
68
 
64
69
  def process_pdf_page(pdf_page)
65
- text_rows = pdf_page.text.split(TEXT_ROW_DELIMITER).map!(&:strip)
70
+ text_rows = pdf_page.text.split(TEXT_ROW_DELIMITER).map(&:strip)
66
71
  text_rows.each do |text_row|
67
72
  next if text_row.empty?
68
73
 
69
- if (new_month_index = MONTH_NAMES.index(text_row))
70
- @current_month_index = new_month_index
71
- elsif text_row.match?(/^\d+\./)
72
- process_nameday_value(text_row)
73
- end
74
+ process_text_row(text_row)
75
+ end
76
+ end
77
+
78
+ def process_text_row(text_row)
79
+ if (new_month_index = MONTH_NAMES.index(text_row))
80
+ @current_month_index = new_month_index
81
+ elsif text_row.match?(/^\d+\./)
82
+ process_nameday_value(text_row)
74
83
  end
75
84
  end
76
85
 
@@ -80,6 +89,7 @@ module Nameday
80
89
 
81
90
  nameday_data[1].split(",").each do |name|
82
91
  next if name.match?(EMPTY_NAMEDAY_REGEXP)
92
+
83
93
  @output[@current_month_index][day] ||= []
84
94
  @output[@current_month_index][day] << name.strip
85
95
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nameday_vvc_pdf_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandrs Ļedovskis