cv-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +14 -0
- data/LICENSE.txt +21 -0
- data/README.md +380 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/exe/cv-parser +8 -0
- data/lib/cv_parser/cli.rb +224 -0
- data/lib/cv_parser/configuration.rb +23 -0
- data/lib/cv_parser/errors.rb +14 -0
- data/lib/cv_parser/extractor.rb +58 -0
- data/lib/cv_parser/pdf_converter.rb +495 -0
- data/lib/cv_parser/providers/anthropic.rb +249 -0
- data/lib/cv_parser/providers/base.rb +119 -0
- data/lib/cv_parser/providers/faker.rb +215 -0
- data/lib/cv_parser/providers/openai.rb +395 -0
- data/lib/cv_parser/version.rb +5 -0
- data/lib/cv_parser.rb +37 -0
- metadata +192 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e6b7590ababb813164420d1ef2b1d648c3cc215f6470c8d4c8d3ba00e62e51ee
|
4
|
+
data.tar.gz: 77f236b9d6c7fd62f7a1979b747f944cdcbd59a0329c6917fb079fd3842fc238
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 565ea71844fe6be24392a19904c7690a91f017b1e5dbd6354587620b10f529a2cff4459d9cc995b737aa982d3b7987d2a686d3cc67f43600e49372e82865f375
|
7
|
+
data.tar.gz: 5703710f848b986b895b98561396d550ff94def445345dc5f2e9eb5fd8bac6b15943eabf16ba426d2cffaf22a446e0aa526fbb78caa98ad2b6d068300d7ce19e
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
|
+
|
8
|
+
## [Unreleased]
|
9
|
+
|
10
|
+
## [0.1.0] - 2025-06-01
|
11
|
+
|
12
|
+
### Added
|
13
|
+
- Initial release of cv-parser gem
|
14
|
+
- Basic project structure and configuration
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024 Your Name
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,380 @@
|
|
1
|
+
# CV Parser
|
2
|
+
|
3
|
+
A Ruby gem for parsing and extracting structured information from CVs/resumes using LLM providers.
|
4
|
+
|
5
|
+
## Features
|
6
|
+
- Convert DOCX to PDF before uploading to LLM providers
|
7
|
+
- Extract structured data from CVs by directly uploading files to LLM providers
|
8
|
+
- Configure different LLM providers (OpenAI, Anthropic, and Faker for testing)
|
9
|
+
- Customizable output schema to match your data requirements (JSON Schema format)
|
10
|
+
- Command-line interface for quick parsing and analysis
|
11
|
+
- Robust error handling and validation
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
Add this line to your application's Gemfile:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
gem 'cv-parser'
|
19
|
+
```
|
20
|
+
|
21
|
+
And then execute:
|
22
|
+
|
23
|
+
```bash
|
24
|
+
$ bundle install
|
25
|
+
```
|
26
|
+
|
27
|
+
Or install it yourself as:
|
28
|
+
|
29
|
+
```bash
|
30
|
+
$ gem install cv-parser
|
31
|
+
```
|
32
|
+
|
33
|
+
## Usage
|
34
|
+
|
35
|
+
### Using in Rails
|
36
|
+
|
37
|
+
You can use CV Parser directly in your Ruby or Rails application to extract structured data from CVs.
|
38
|
+
|
39
|
+
#### Basic Configuration
|
40
|
+
|
41
|
+
You can configure the gem for different providers:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
require 'cv_parser'
|
45
|
+
|
46
|
+
# OpenAI
|
47
|
+
CvParser.configure do |config|
|
48
|
+
config.provider = :openai
|
49
|
+
config.api_key = ENV['OPENAI_API_KEY']
|
50
|
+
config.model = 'gpt-4.1-mini'
|
51
|
+
config.output_schema = schema
|
52
|
+
end
|
53
|
+
|
54
|
+
# Anthropic
|
55
|
+
CvParser.configure do |config|
|
56
|
+
config.provider = :anthropic
|
57
|
+
config.api_key = ENV['ANTHROPIC_API_KEY']
|
58
|
+
config.model = 'claude-3-sonnet-20240229'
|
59
|
+
config.output_schema = schema
|
60
|
+
end
|
61
|
+
|
62
|
+
# Faker (for testing/development)
|
63
|
+
CvParser.configure do |config|
|
64
|
+
config.provider = :faker
|
65
|
+
config.output_schema = schema
|
66
|
+
end
|
67
|
+
```
|
68
|
+
|
69
|
+
#### Defining an Output Schema
|
70
|
+
|
71
|
+
Define the schema for the data you want to extract using JSON Schema format:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
schema = {
|
75
|
+
type: "json_schema",
|
76
|
+
name: "cv_parsing",
|
77
|
+
description: "Schema for a CV or resume document",
|
78
|
+
properties: {
|
79
|
+
personal_info: {
|
80
|
+
type: "object",
|
81
|
+
description: "Personal and contact information for the candidate",
|
82
|
+
properties: {
|
83
|
+
name: {
|
84
|
+
type: "string",
|
85
|
+
description: "Full name of the individual"
|
86
|
+
},
|
87
|
+
email: {
|
88
|
+
type: "string",
|
89
|
+
description: "Email address of the individual"
|
90
|
+
},
|
91
|
+
phone: {
|
92
|
+
type: "string",
|
93
|
+
description: "Phone number of the individual"
|
94
|
+
},
|
95
|
+
location: {
|
96
|
+
type: "string",
|
97
|
+
description: "Geographic location or city of residence"
|
98
|
+
}
|
99
|
+
},
|
100
|
+
required: %w[name email]
|
101
|
+
},
|
102
|
+
experience: {
|
103
|
+
type: "array",
|
104
|
+
description: "List of professional experience entries",
|
105
|
+
items: {
|
106
|
+
type: "object",
|
107
|
+
description: "A professional experience entry",
|
108
|
+
properties: {
|
109
|
+
company: {
|
110
|
+
type: "string",
|
111
|
+
description: "Name of the company or organization"
|
112
|
+
},
|
113
|
+
position: {
|
114
|
+
type: "string",
|
115
|
+
description: "Job title or position held"
|
116
|
+
},
|
117
|
+
start_date: {
|
118
|
+
type: "string",
|
119
|
+
description: "Start date of employment (e.g. '2020-01')"
|
120
|
+
},
|
121
|
+
end_date: {
|
122
|
+
type: "string",
|
123
|
+
description: "End date of employment or 'present'"
|
124
|
+
},
|
125
|
+
description: {
|
126
|
+
type: "string",
|
127
|
+
description: "Description of responsibilities and achievements"
|
128
|
+
}
|
129
|
+
},
|
130
|
+
required: %w[company position start_date]
|
131
|
+
}
|
132
|
+
},
|
133
|
+
education: {
|
134
|
+
type: "array",
|
135
|
+
description: "List of educational qualifications",
|
136
|
+
items: {
|
137
|
+
type: "object",
|
138
|
+
description: "An education entry",
|
139
|
+
properties: {
|
140
|
+
institution: {
|
141
|
+
type: "string",
|
142
|
+
description: "Name of the educational institution"
|
143
|
+
},
|
144
|
+
degree: {
|
145
|
+
type: "string",
|
146
|
+
description: "Degree or certification received"
|
147
|
+
},
|
148
|
+
field: {
|
149
|
+
type: "string",
|
150
|
+
description: "Field of study"
|
151
|
+
},
|
152
|
+
graduation_date: {
|
153
|
+
type: "string",
|
154
|
+
description: "Graduation date (e.g. '2019-06')"
|
155
|
+
}
|
156
|
+
},
|
157
|
+
required: %w[institution degree]
|
158
|
+
}
|
159
|
+
},
|
160
|
+
skills: {
|
161
|
+
type: "array",
|
162
|
+
description: "List of relevant skills",
|
163
|
+
items: {
|
164
|
+
type: "string",
|
165
|
+
description: "A single skill"
|
166
|
+
}
|
167
|
+
}
|
168
|
+
},
|
169
|
+
required: %w[personal_info experience education skills]
|
170
|
+
}
|
171
|
+
```
|
172
|
+
|
173
|
+
Set the output schema in the configuration block:
|
174
|
+
|
175
|
+
```ruby
|
176
|
+
CvParser.configure do |config|
|
177
|
+
config.output_schema = schema
|
178
|
+
end
|
179
|
+
```
|
180
|
+
|
181
|
+
You can also set the output schema in the extractor method which will override the configuration block:
|
182
|
+
|
183
|
+
```ruby
|
184
|
+
extractor = CvParser::Extractor.new
|
185
|
+
extractor.extract(
|
186
|
+
output_schema: schema
|
187
|
+
)
|
188
|
+
```
|
189
|
+
|
190
|
+
#### Extracting Data from a CV
|
191
|
+
|
192
|
+
```ruby
|
193
|
+
extractor = CvParser::Extractor.new
|
194
|
+
result = extractor.extract(
|
195
|
+
file_path: "path/to/resume.pdf"
|
196
|
+
)
|
197
|
+
|
198
|
+
puts "Name: #{result['personal_info']['name']}"
|
199
|
+
puts "Email: #{result['personal_info']['email']}"
|
200
|
+
result['skills'].each { |skill| puts "- #{skill}" }
|
201
|
+
```
|
202
|
+
|
203
|
+
#### Error Handling
|
204
|
+
|
205
|
+
```ruby
|
206
|
+
begin
|
207
|
+
result = extractor.extract(
|
208
|
+
file_path: "path/to/resume.pdf"
|
209
|
+
)
|
210
|
+
rescue CvParser::FileNotFoundError, CvParser::FileNotReadableError => e
|
211
|
+
puts "File error: #{e.message}"
|
212
|
+
rescue CvParser::ParseError => e
|
213
|
+
puts "Error parsing the response: #{e.message}"
|
214
|
+
rescue CvParser::APIError => e
|
215
|
+
puts "LLM API error: #{e.message}"
|
216
|
+
rescue CvParser::ConfigurationError => e
|
217
|
+
puts "Configuration error: #{e.message}"
|
218
|
+
end
|
219
|
+
```
|
220
|
+
|
221
|
+
---
|
222
|
+
|
223
|
+
### Command-Line Interface
|
224
|
+
|
225
|
+
CV Parser also provides a CLI for quick analysis:
|
226
|
+
|
227
|
+
```bash
|
228
|
+
cv-parser path/to/resume.pdf
|
229
|
+
cv-parser --provider anthropic path/to/resume.pdf
|
230
|
+
cv-parser --format yaml --output result.yaml path/to/resume.pdf
|
231
|
+
cv-parser --schema custom-schema.json path/to/resume.pdf
|
232
|
+
cv-parser --help
|
233
|
+
```
|
234
|
+
|
235
|
+
You can use environment variables for API keys and provider selection:
|
236
|
+
|
237
|
+
```bash
|
238
|
+
export OPENAI_API_KEY=your-openai-key
|
239
|
+
export ANTHROPIC_API_KEY=your-anthropic-key
|
240
|
+
export CV_PARSER_PROVIDER=openai
|
241
|
+
export CV_PARSER_API_KEY=your-api-key
|
242
|
+
cv-parser resume.pdf
|
243
|
+
```
|
244
|
+
|
245
|
+
|
246
|
+
## Advanced Configuration
|
247
|
+
|
248
|
+
You can further customize CV Parser by setting advanced options in the configuration block. For example:
|
249
|
+
|
250
|
+
```ruby
|
251
|
+
CvParser.configure do |config|
|
252
|
+
# Configure OpenAI with organization ID
|
253
|
+
config.provider = :openai
|
254
|
+
config.api_key = ENV['OPENAI_API_KEY']
|
255
|
+
config.model = 'gpt-4.1-mini'
|
256
|
+
|
257
|
+
# Set timeout for file uploads (important for larger files)
|
258
|
+
config.timeout = 120 # TODO - not yet implemented
|
259
|
+
config.max_retries = 2 # TODO - not yet implemented
|
260
|
+
|
261
|
+
# Provider-specific options
|
262
|
+
config.provider_options[:organization_id] = ENV['OPENAI_ORG_ID']
|
263
|
+
|
264
|
+
# You can also set custom prompts for the LLM:
|
265
|
+
config.prompt = "Extract the following fields from the CV..."
|
266
|
+
config.system_prompt = "You are a CV parsing assistant."
|
267
|
+
|
268
|
+
# Set the output schema (JSON Schema format)
|
269
|
+
config.output_schema = schema
|
270
|
+
|
271
|
+
# Set the max tokens and temperature
|
272
|
+
config.max_tokens = 4000
|
273
|
+
config.temperature = 0.1
|
274
|
+
end
|
275
|
+
```
|
276
|
+
|
277
|
+
### Testing and Development
|
278
|
+
|
279
|
+
#### Using the Faker Provider
|
280
|
+
|
281
|
+
The Faker provider generates realistic-looking fake data based on your schema without making API calls. This is useful for:
|
282
|
+
- Writing tests (RSpec, Rails, etc.)
|
283
|
+
- Developing UI components
|
284
|
+
- Demonstrating functionality without API keys
|
285
|
+
- Avoiding API costs and rate limits
|
286
|
+
- Tests run faster without external API calls
|
287
|
+
- Consistent, predictable results
|
288
|
+
- No need for API keys in CI/CD environments
|
289
|
+
|
290
|
+
#### Basic Test Setup
|
291
|
+
|
292
|
+
Here's how to use the faker provider in your RSpec tests:
|
293
|
+
|
294
|
+
```ruby
|
295
|
+
# spec/your_resume_processor_spec.rb
|
296
|
+
require 'spec_helper'
|
297
|
+
|
298
|
+
RSpec.describe YourResumeProcessor do
|
299
|
+
# Define a JSON Schema format schema for testing
|
300
|
+
let(:test_schema) do
|
301
|
+
{
|
302
|
+
type: "json_schema",
|
303
|
+
name: "cv_parsing_test",
|
304
|
+
description: "Test schema for CV parsing",
|
305
|
+
properties: {
|
306
|
+
personal_info: {
|
307
|
+
type: "object",
|
308
|
+
description: "Personal information",
|
309
|
+
properties: {
|
310
|
+
name: {
|
311
|
+
type: "string",
|
312
|
+
description: "Full name"
|
313
|
+
},
|
314
|
+
email: {
|
315
|
+
type: "string",
|
316
|
+
description: "Email address"
|
317
|
+
}
|
318
|
+
},
|
319
|
+
required: %w[name email]
|
320
|
+
},
|
321
|
+
skills: {
|
322
|
+
type: "array",
|
323
|
+
description: "List of skills",
|
324
|
+
items: {
|
325
|
+
type: "string",
|
326
|
+
description: "A skill"
|
327
|
+
}
|
328
|
+
}
|
329
|
+
},
|
330
|
+
required: %w[personal_info skills]
|
331
|
+
}
|
332
|
+
end
|
333
|
+
|
334
|
+
before do
|
335
|
+
# Configure CV Parser to use the faker provider
|
336
|
+
CvParser.configure do |config|
|
337
|
+
config.provider = :faker
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
after do
|
342
|
+
# Reset configuration after tests
|
343
|
+
CvParser.reset
|
344
|
+
end
|
345
|
+
|
346
|
+
it "processes a resume and extracts relevant fields" do
|
347
|
+
processor = YourResumeProcessor.new
|
348
|
+
result = processor.process_resume("spec/fixtures/sample_resume.pdf", test_schema)
|
349
|
+
|
350
|
+
# The faker provider will return consistent test data
|
351
|
+
expect(result.personal_info.name).to eq("John Doe")
|
352
|
+
expect(result.personal_info.email).to eq("john.doe@example.com")
|
353
|
+
expect(result.skills).to be_an(Array)
|
354
|
+
expect(result.skills).not_to be_empty
|
355
|
+
end
|
356
|
+
end
|
357
|
+
```
|
358
|
+
|
359
|
+
#### Simple Faker Example
|
360
|
+
|
361
|
+
```ruby
|
362
|
+
# Configure with Faker provider
|
363
|
+
CvParser.configure do |config|
|
364
|
+
config.provider = :faker
|
365
|
+
end
|
366
|
+
|
367
|
+
# Use the extractor as normal
|
368
|
+
extractor = CvParser::Extractor.new
|
369
|
+
result = extractor.extract(
|
370
|
+
file_path: "path/to/resume.pdf", # Path will be ignored by faker
|
371
|
+
output_schema: schema # Using the JSON Schema format defined above
|
372
|
+
)
|
373
|
+
|
374
|
+
# Faker will generate structured data based on your schema
|
375
|
+
puts result.inspect
|
376
|
+
```
|
377
|
+
|
378
|
+
#### Data Generation Behavior
|
379
|
+
|
380
|
+
The faker provider generates realistic-looking data based on your schema. The data is deterministic for fields like name, email, and phone, but randomized for arrays and collections. You can write tests that check for structure without relying on specific content for variable fields.
|
data/bin/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "cv_parser"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
require "irb"
|
11
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/cv-parser
ADDED
@@ -0,0 +1,224 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "optparse"
|
4
|
+
require "json"
|
5
|
+
require_relative "../cv_parser"
|
6
|
+
|
7
|
+
module CvParser
|
8
|
+
class CLI
|
9
|
+
DEFAULT_SCHEMA = {
|
10
|
+
type: "json_schema",
|
11
|
+
properties: {
|
12
|
+
contact_information: {
|
13
|
+
type: "object",
|
14
|
+
properties: {
|
15
|
+
name: { type: "string" },
|
16
|
+
email: { type: "string" },
|
17
|
+
phone: { type: "string" },
|
18
|
+
location: { type: "string" },
|
19
|
+
linkedin: { type: "string" }
|
20
|
+
}
|
21
|
+
},
|
22
|
+
education: {
|
23
|
+
type: "array",
|
24
|
+
items: {
|
25
|
+
type: "object",
|
26
|
+
properties: {
|
27
|
+
institution: { type: "string" },
|
28
|
+
degree: { type: "string" },
|
29
|
+
field_of_study: { type: "string" },
|
30
|
+
dates: { type: "string" },
|
31
|
+
achievements: {
|
32
|
+
type: "array",
|
33
|
+
items: { type: "string" }
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
},
|
38
|
+
work_experience: {
|
39
|
+
type: "array",
|
40
|
+
items: {
|
41
|
+
type: "object",
|
42
|
+
properties: {
|
43
|
+
company: { type: "string" },
|
44
|
+
position: { type: "string" },
|
45
|
+
dates: { type: "string" },
|
46
|
+
responsibilities: {
|
47
|
+
type: "array",
|
48
|
+
items: { type: "string" }
|
49
|
+
},
|
50
|
+
achievements: {
|
51
|
+
type: "array",
|
52
|
+
items: { type: "string" }
|
53
|
+
}
|
54
|
+
}
|
55
|
+
}
|
56
|
+
},
|
57
|
+
skills: {
|
58
|
+
type: "array",
|
59
|
+
items: { type: "string" }
|
60
|
+
},
|
61
|
+
languages: {
|
62
|
+
type: "array",
|
63
|
+
items: { type: "string" }
|
64
|
+
},
|
65
|
+
certifications: {
|
66
|
+
type: "array",
|
67
|
+
items: { type: "string" }
|
68
|
+
}
|
69
|
+
}
|
70
|
+
}.freeze
|
71
|
+
|
72
|
+
def initialize
|
73
|
+
@options = {
|
74
|
+
provider: nil,
|
75
|
+
api_key: nil,
|
76
|
+
output_format: "json",
|
77
|
+
output_file: nil,
|
78
|
+
schema_file: nil
|
79
|
+
}
|
80
|
+
end
|
81
|
+
|
82
|
+
def run(args = ARGV)
|
83
|
+
parse_options(args)
|
84
|
+
|
85
|
+
if args.empty?
|
86
|
+
puts "Error: No input file specified"
|
87
|
+
puts @parser
|
88
|
+
exit 1
|
89
|
+
end
|
90
|
+
|
91
|
+
input_file = args[0]
|
92
|
+
|
93
|
+
# Early exit for special options where we don't need a file
|
94
|
+
return if @options[:help] || @options[:version]
|
95
|
+
|
96
|
+
# Check if file exists
|
97
|
+
if !input_file || !File.exist?(input_file)
|
98
|
+
puts "Error: Input file '#{input_file}' not found"
|
99
|
+
exit 1
|
100
|
+
end
|
101
|
+
|
102
|
+
configure_parser
|
103
|
+
|
104
|
+
begin
|
105
|
+
output_schema = load_schema
|
106
|
+
result = extract_data(input_file, output_schema)
|
107
|
+
output_result(result)
|
108
|
+
rescue CvParser::Error => e
|
109
|
+
puts "Error: #{e.message}"
|
110
|
+
exit 1
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def parse_options(args)
|
117
|
+
@parser = OptionParser.new do |opts|
|
118
|
+
opts.banner = "Usage: cv-parser [options] <file>"
|
119
|
+
|
120
|
+
opts.on("-p", "--provider PROVIDER", "LLM Provider (openai, anthropic, or faker)") do |provider|
|
121
|
+
@options[:provider] = provider.to_sym
|
122
|
+
end
|
123
|
+
|
124
|
+
opts.on("-k", "--api-key API_KEY", "API key for the LLM provider") do |key|
|
125
|
+
@options[:api_key] = key
|
126
|
+
end
|
127
|
+
|
128
|
+
opts.on("-f", "--format FORMAT", "Output format (json or yaml)") do |format|
|
129
|
+
@options[:output_format] = format
|
130
|
+
end
|
131
|
+
|
132
|
+
opts.on("-o", "--output FILE", "Write output to file") do |file|
|
133
|
+
@options[:output_file] = file
|
134
|
+
end
|
135
|
+
|
136
|
+
opts.on("-s", "--schema FILE", "Custom schema file (JSON)") do |file|
|
137
|
+
@options[:schema_file] = file
|
138
|
+
end
|
139
|
+
|
140
|
+
opts.on("-h", "--help", "Show this help message") do
|
141
|
+
puts opts
|
142
|
+
@options[:help] = true
|
143
|
+
exit
|
144
|
+
end
|
145
|
+
|
146
|
+
opts.on("-v", "--version", "Show version") do
|
147
|
+
puts "CV Parser v#{CvParser::VERSION}"
|
148
|
+
@options[:version] = true
|
149
|
+
exit
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
@parser.parse!(args)
|
154
|
+
end
|
155
|
+
|
156
|
+
def configure_parser
|
157
|
+
CvParser.configure do |config|
|
158
|
+
config.provider = @options[:provider] if @options[:provider]
|
159
|
+
config.api_key = @options[:api_key] if @options[:api_key]
|
160
|
+
|
161
|
+
# Try environment variables if not provided via options
|
162
|
+
config.provider ||= (ENV["CV_PARSER_PROVIDER"]&.to_sym if ENV["CV_PARSER_PROVIDER"])
|
163
|
+
|
164
|
+
# Configure based on provider
|
165
|
+
case config.provider
|
166
|
+
when :openai
|
167
|
+
config.api_key ||= ENV["CV_PARSER_API_KEY"] || ENV.fetch("OPENAI_API_KEY", nil)
|
168
|
+
when :anthropic
|
169
|
+
config.api_key ||= ENV["CV_PARSER_API_KEY"] || ENV.fetch("ANTHROPIC_API_KEY", nil)
|
170
|
+
when :faker
|
171
|
+
config.api_key ||= "fake-api-key"
|
172
|
+
else
|
173
|
+
# Default to OpenAI if nothing specified
|
174
|
+
config.provider = :openai
|
175
|
+
config.api_key ||= ENV["CV_PARSER_API_KEY"] || ENV.fetch("OPENAI_API_KEY", nil)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def load_schema
|
181
|
+
if @options[:schema_file]
|
182
|
+
unless File.exist?(@options[:schema_file])
|
183
|
+
puts "Error: Schema file '#{@options[:schema_file]}' not found"
|
184
|
+
exit 1
|
185
|
+
end
|
186
|
+
|
187
|
+
begin
|
188
|
+
JSON.parse(File.read(@options[:schema_file]))
|
189
|
+
rescue JSON::ParserError => e
|
190
|
+
puts "Error: Invalid JSON schema file: #{e.message}"
|
191
|
+
exit 1
|
192
|
+
end
|
193
|
+
else
|
194
|
+
DEFAULT_SCHEMA
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def extract_data(input_file, output_schema)
|
199
|
+
puts "Parsing CV: #{input_file}"
|
200
|
+
puts "Using provider: #{CvParser.configuration.provider}"
|
201
|
+
|
202
|
+
extractor = CvParser::Extractor.new
|
203
|
+
extractor.extract(file_path: input_file, output_schema: output_schema)
|
204
|
+
end
|
205
|
+
|
206
|
+
def output_result(result)
|
207
|
+
formatted_output = case @options[:output_format]
|
208
|
+
when "yaml"
|
209
|
+
require "yaml"
|
210
|
+
result.to_yaml
|
211
|
+
else
|
212
|
+
JSON.pretty_generate(result)
|
213
|
+
end
|
214
|
+
|
215
|
+
if @options[:output_file]
|
216
|
+
File.write(@options[:output_file], formatted_output)
|
217
|
+
puts "Output written to: #{@options[:output_file]}"
|
218
|
+
else
|
219
|
+
puts "\nResults:"
|
220
|
+
puts formatted_output
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CvParser
|
4
|
+
class Configuration
|
5
|
+
attr_accessor :provider, :model, :api_key, :timeout, :max_retries, :prompt, :system_prompt,
|
6
|
+
:output_schema, :max_tokens, :temperature
|
7
|
+
attr_reader :provider_options
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@provider = nil
|
11
|
+
@model = nil
|
12
|
+
@api_key = nil
|
13
|
+
@timeout = 60
|
14
|
+
@max_retries = 3
|
15
|
+
@provider_options = {}
|
16
|
+
@prompt = nil
|
17
|
+
@system_prompt = nil
|
18
|
+
@output_schema = nil
|
19
|
+
@max_tokens = 4000
|
20
|
+
@temperature = 0.1
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|