pdf-reader-find_text 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19766f6210ef79a6a29a22f6c0f501c8acc2cb18d539511a8fd163962735264f
4
- data.tar.gz: 101ced07a4294793b2f7b8addb54e7df2f25a254fc5c88e0c5f6f2c13dba6453
3
+ metadata.gz: 6d2b61e7bafca6cb75a3c735e6f3637f34ea93e0ff127dd6eaf40463ce9938b4
4
+ data.tar.gz: '08a3288e56720186c074899434874601f728bed07b7f571ca145f3a40f586a41'
5
5
  SHA512:
6
- metadata.gz: 27d305f423497fbf1bdb14be0eb777250bd36e6b16f673609e3d193fd9ca317eb7e03b8669448c8987c39bfd15ccabeedc8f823fad654567da08b960d5e0a787
7
- data.tar.gz: 4ac4586a856edc0202b6ce60819de435b4786c369a1ab3e022ccf5907188184b7d2b89b19fd027a6bc8495e81ea0aa41f6761c5881531bf778ebf0710bf3d0a1
6
+ metadata.gz: 798c0e1122ca9d9671ab6dbba9d83f8383fd5eefc07453be11b024c2f3b8c8c1416f9c56c16690d6e01dd262451177768a0069b966acee74f53424d10c82f51f
7
+ data.tar.gz: ee27dbabc732613b4f4bc2361c23fb4e56e01113fff00fa0fd028b0ad1f931d4b719d41d6ce43267bdbabd04eced66a8aae2f55b2f60a26bbfc33703ef2dfad3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.2] - 2022-02-24
4
+
5
+ - Handle strings with spaces in a better way
6
+
7
+ ## [0.1.1] - 2022-02-23
8
+
9
+ - Remove spaces before looking for text
10
+
3
11
  ## [0.1.0] - 2022-02-23
4
12
 
5
13
  - Initial release
@@ -3,7 +3,7 @@
3
3
  module PDF
4
4
  class Reader
5
5
  module FindText
6
- VERSION = "0.1.1"
6
+ VERSION = "0.1.2"
7
7
  end
8
8
  end
9
9
  end
@@ -5,14 +5,22 @@ require "pdf-reader"
5
5
  require_relative "find_text/version"
6
6
 
7
7
  module PDF::Reader::FindText
8
- def find_text(text)
9
- text = text.tr(' ', '')
8
+ def find_text(value)
9
+ runs(merge: false).each_cons(value.tr(' ', '').size).map do |chars|
10
+ string = merge_runs_with_max_length(chars, value.size)
11
+ string if string.text[0, value.size] == value
12
+ end.compact
13
+ end
14
+
15
+ private
10
16
 
11
- runs(merge: false).each_cons(text.size).select do |r|
12
- r.map(&:text).join == text
13
- end.map do |r|
14
- PDF::Reader::TextRun.new r.first.x, r.first.y,
15
- r.sum(&:width), r.map(&:font_size).max, r.map(&:text).join
17
+ def merge_runs_with_max_length(chars, length)
18
+ chars.inject do |string, char|
19
+ if string.mergable?(char) && string.text.size < length
20
+ string + char
21
+ else
22
+ string
23
+ end
16
24
  end
17
25
  end
18
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader-find_text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Guillaume Dott
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-02-23 00:00:00.000000000 Z
11
+ date: 2022-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pdf-reader