udise_school_report_reader 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +20 -0
- data/LICENSE.txt +21 -0
- data/README.md +45 -0
- data/lib/udise_school_report_reader/activities_data_reader.rb +58 -0
- data/lib/udise_school_report_reader/anganwadi_data_reader.rb +22 -0
- data/lib/udise_school_report_reader/basic_info_data_reader.rb +29 -0
- data/lib/udise_school_report_reader/block_rectangle_combiner.rb +115 -0
- data/lib/udise_school_report_reader/building_data_reader.rb +36 -0
- data/lib/udise_school_report_reader/characteristics_reader.rb +28 -0
- data/lib/udise_school_report_reader/csv_writer.rb +75 -0
- data/lib/udise_school_report_reader/data_reader_base.rb +86 -0
- data/lib/udise_school_report_reader/digital_facilities_data_reader.rb +42 -0
- data/lib/udise_school_report_reader/enrollment_data_reader.rb +136 -0
- data/lib/udise_school_report_reader/enrollment_html_writer.rb +81 -0
- data/lib/udise_school_report_reader/enrollment_yaml_writer.rb +62 -0
- data/lib/udise_school_report_reader/ews_data_reader.rb +118 -0
- data/lib/udise_school_report_reader/ews_html_writer.rb +63 -0
- data/lib/udise_school_report_reader/ews_yaml_writer.rb +31 -0
- data/lib/udise_school_report_reader/location_data_reader.rb +47 -0
- data/lib/udise_school_report_reader/official_data_reader.rb +40 -0
- data/lib/udise_school_report_reader/pdf_block_extractor.rb +49 -0
- data/lib/udise_school_report_reader/pdf_content_compressor.rb +36 -0
- data/lib/udise_school_report_reader/pdf_rectangle_extractor.rb +53 -0
- data/lib/udise_school_report_reader/rooms_data_reader.rb +36 -0
- data/lib/udise_school_report_reader/rte_data_reader.rb +118 -0
- data/lib/udise_school_report_reader/rte_html_writer.rb +63 -0
- data/lib/udise_school_report_reader/rte_yaml_writer.rb +61 -0
- data/lib/udise_school_report_reader/sanitation_data_reader.rb +56 -0
- data/lib/udise_school_report_reader/school_report_parser.rb +295 -0
- data/lib/udise_school_report_reader/teacher_data_reader.rb +204 -0
- data/lib/udise_school_report_reader/version.rb +3 -0
- data/lib/udise_school_report_reader.rb +41 -0
- data/test/school_report_parser_test.rb +62 -0
- metadata +165 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: eb4e7f18e92a981045b328698bf04015f68aee7f22c9bc1a19b8ceab7c416eca
|
4
|
+
data.tar.gz: c0e734f6c22a5ad1ce6af4b2ee094ca140540500ebe75ba1594c6f1d6eb528bd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c453df46b297daef2ed774d838289e9ea41588d48f07db24964ca6fcd6cd918c8a99f789c7a89cb14bc96b8769091a1e2cb36183b90615a30cb814d7f16d586a
|
7
|
+
data.tar.gz: b9b2b0690d6f0d1a65f0f36e2d8a0a707860653332066f8591b5cdce573a5d5429f3971e57b7d889665398640282abfc6b543b335394a0d0d8e3faff7c740a19
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
## [0.1.0] - 2024-01-01
|
4
|
+
|
5
|
+
### Added
|
6
|
+
- Initial release
|
7
|
+
- Basic PDF parsing functionality
|
8
|
+
- Data readers for various sections:
|
9
|
+
- Basic school information
|
10
|
+
- Teacher details
|
11
|
+
- Room information
|
12
|
+
- Location data
|
13
|
+
- Building information
|
14
|
+
- Sanitation facilities
|
15
|
+
- Digital facilities
|
16
|
+
- RTE compliance
|
17
|
+
- EWS details
|
18
|
+
- Enrollment data
|
19
|
+
- HTML and YAML writers for data export
|
20
|
+
- CSV export functionality
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 UDISE Plus
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# UDISE School Report Reader
|
2
|
+
|
3
|
+
A Ruby gem for parsing and extracting data from UDISE (Unified District Information System for Education) school reports.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'udise_school_report_reader'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle install
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install udise_school_report_reader
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'udise_school_report_reader'
|
25
|
+
|
26
|
+
# Initialize the parser with a PDF file
|
27
|
+
parser = UdiseSchoolReportReader::SchoolReportParser.new('path/to/report.pdf')
|
28
|
+
|
29
|
+
## Features
|
30
|
+
|
31
|
+
- Parse UDISE school reports in PDF format
|
32
|
+
|
33
|
+
## Development
|
34
|
+
|
35
|
+
After checking out the repo, run `bundle install` to install dependencies. Then, run `rake test` to run the tests.
|
36
|
+
|
37
|
+
To install this gem onto your local machine, run `bundle exec rake install`.
|
38
|
+
|
39
|
+
## Contributing
|
40
|
+
|
41
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/UDISE-Plus/udise-school-report-reader.
|
42
|
+
|
43
|
+
## License
|
44
|
+
|
45
|
+
The gem is available as open source under the terms of the [MIT License](LICENSE.txt).
|
@@ -0,0 +1,58 @@
|
|
1
|
+
class ActivitiesDataReader
|
2
|
+
def self.read(lines)
|
3
|
+
data = {
|
4
|
+
'activities' => {
|
5
|
+
'inspections' => {},
|
6
|
+
'instructional_days' => {},
|
7
|
+
'student_hours' => {},
|
8
|
+
'teacher_hours' => {}
|
9
|
+
}
|
10
|
+
}
|
11
|
+
|
12
|
+
lines.each_with_index do |line, i|
|
13
|
+
next_line = lines[i + 1]&.strip
|
14
|
+
|
15
|
+
case line
|
16
|
+
when "Acad. Inspections"
|
17
|
+
data['activities']['inspections']['academic'] = next_line.to_i if next_line =~ /^\d+$/
|
18
|
+
when "CRC Coordinator"
|
19
|
+
data['activities']['inspections']['crc_coordinator'] = next_line.to_i if next_line =~ /^\d+$/
|
20
|
+
when "Block Level Officers"
|
21
|
+
data['activities']['inspections']['block_level'] = next_line.to_i if next_line =~ /^\d+$/
|
22
|
+
when "State/District Officers"
|
23
|
+
data['activities']['inspections']['state_district'] = next_line.to_i if next_line =~ /^\d+$/
|
24
|
+
when "Instructional days"
|
25
|
+
if lines[i + 1] =~ /^\d+$/
|
26
|
+
data['activities']['instructional_days']['primary'] = lines[i + 1].to_i
|
27
|
+
data['activities']['instructional_days']['upper_primary'] = lines[i + 2].to_i if lines[i + 2] =~ /^\d+$/
|
28
|
+
data['activities']['instructional_days']['secondary'] = lines[i + 3].to_i if lines[i + 3] =~ /^\d+$/
|
29
|
+
data['activities']['instructional_days']['higher_secondary'] = lines[i + 4].to_i if lines[i + 4] =~ /^\d+$/
|
30
|
+
end
|
31
|
+
when "Avg.School hrs.Std."
|
32
|
+
if lines[i + 1] =~ /^\d+\.?\d*$/
|
33
|
+
data['activities']['student_hours']['primary'] = lines[i + 1].to_f
|
34
|
+
data['activities']['student_hours']['upper_primary'] = lines[i + 2].to_f if lines[i + 2] =~ /^\d+\.?\d*$/
|
35
|
+
data['activities']['student_hours']['secondary'] = lines[i + 3].to_f if lines[i + 3] =~ /^\d+\.?\d*$/
|
36
|
+
data['activities']['student_hours']['higher_secondary'] = lines[i + 4].to_f if lines[i + 4] =~ /^\d+\.?\d*$/
|
37
|
+
end
|
38
|
+
when "Avg.School hrs.Tch."
|
39
|
+
if lines[i + 1] =~ /^\d+\.?\d*$/
|
40
|
+
data['activities']['teacher_hours']['primary'] = lines[i + 1].to_f
|
41
|
+
data['activities']['teacher_hours']['upper_primary'] = lines[i + 2].to_f if lines[i + 2] =~ /^\d+\.?\d*$/
|
42
|
+
data['activities']['teacher_hours']['secondary'] = lines[i + 3].to_f if lines[i + 3] =~ /^\d+\.?\d*$/
|
43
|
+
data['activities']['teacher_hours']['higher_secondary'] = lines[i + 4].to_f if lines[i + 4] =~ /^\d+\.?\d*$/
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Clean up empty sections
|
49
|
+
data['activities']['inspections'].reject! { |_, v| v.nil? }
|
50
|
+
data['activities']['instructional_days'].reject! { |_, v| v.nil? }
|
51
|
+
data['activities']['student_hours'].reject! { |_, v| v.nil? }
|
52
|
+
data['activities']['teacher_hours'].reject! { |_, v| v.nil? }
|
53
|
+
data['activities'].reject! { |_, v| v.empty? }
|
54
|
+
data.reject! { |_, v| v.empty? }
|
55
|
+
|
56
|
+
data
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class AnganwadiDataReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'Anganwadi At Premises' => {
|
8
|
+
key_path: ['anganwadi', 'at_premises']
|
9
|
+
},
|
10
|
+
'Anganwadi Boys' => {
|
11
|
+
key_path: ['anganwadi', 'boys'],
|
12
|
+
value_type: :integer
|
13
|
+
},
|
14
|
+
'Anganwadi Girls' => {
|
15
|
+
key_path: ['anganwadi', 'girls'],
|
16
|
+
value_type: :integer
|
17
|
+
},
|
18
|
+
'Anganwadi Worker' => {
|
19
|
+
key_path: ['anganwadi', 'worker']
|
20
|
+
}
|
21
|
+
}
|
22
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class BasicInfoDataReader
|
2
|
+
def self.read(lines)
|
3
|
+
require 'yaml'
|
4
|
+
template = YAML.load_file('template.yml')
|
5
|
+
data = { 'basic_info' => template['basic_info'] }
|
6
|
+
|
7
|
+
lines.each_with_index do |line, i|
|
8
|
+
next_line = lines[i + 1]&.strip
|
9
|
+
|
10
|
+
case line
|
11
|
+
when "UDISE CODE"
|
12
|
+
if next_line && (match = next_line.match(/(\d{2})\s*(\d{2})\s*(\d{2})\s*(\d{2})\s*(\d{3})/))
|
13
|
+
data['basic_info']['udise_code'] = match[1..5].join('')
|
14
|
+
end
|
15
|
+
when "School Name"
|
16
|
+
if next_line && next_line.include?("CARMEL")
|
17
|
+
data['basic_info']['name'] = next_line
|
18
|
+
end
|
19
|
+
when /Academic Year.*:\s*(\d{4}-\d{2})/
|
20
|
+
data['basic_info']['academic_year'] = $1
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Clean up empty sections
|
25
|
+
data['basic_info'].reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
26
|
+
|
27
|
+
data
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
class BlockRectangleCombiner
|
2
|
+
def self.combine(blocks, rects)
|
3
|
+
# Filter out blocks with missing coordinates
|
4
|
+
invalid_blocks = blocks.reject { |block| block[:x] && block[:y] || block[:text].to_s.empty? }
|
5
|
+
if invalid_blocks.any?
|
6
|
+
warn "Warning: Found #{invalid_blocks.size} non-empty blocks with missing coordinates"
|
7
|
+
invalid_blocks.each do |block|
|
8
|
+
warn " - Page #{block[:page]}: '#{block[:text]}'"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
valid_blocks = blocks.select { |block| block[:x] && block[:y] }
|
12
|
+
|
13
|
+
# Filter out rectangles with missing or invalid coordinates
|
14
|
+
invalid_rects = rects.reject { |rect| rect[:x] && rect[:y] && rect[:width] && rect[:height] && rect[:width] > 0 && rect[:height] > 0 }
|
15
|
+
if invalid_rects.any?
|
16
|
+
warn "Warning: Found #{invalid_rects.size} rectangles with invalid coordinates"
|
17
|
+
invalid_rects.each do |rect|
|
18
|
+
warn " - Page #{rect[:page]}: x=#{rect[:x]}, y=#{rect[:y]}, w=#{rect[:width]}, h=#{rect[:height]}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
valid_rects = rects.select { |rect| rect[:x] && rect[:y] && rect[:width] && rect[:height] && rect[:width] > 0 && rect[:height] > 0 }
|
22
|
+
|
23
|
+
# For each text block, find its smallest containing rectangle
|
24
|
+
combined_data = valid_blocks.map do |block|
|
25
|
+
# Find rectangles on the same page that contain this text block
|
26
|
+
containing_rects = valid_rects.select do |rect|
|
27
|
+
rect[:page] == block[:page] &&
|
28
|
+
block[:x] >= rect[:x] &&
|
29
|
+
block[:x] <= (rect[:x] + rect[:width]) &&
|
30
|
+
block[:y] >= rect[:y] &&
|
31
|
+
block[:y] <= (rect[:y] + rect[:height])
|
32
|
+
end
|
33
|
+
|
34
|
+
# Find the smallest containing rectangle by area
|
35
|
+
smallest_rect = containing_rects.min_by { |r| r[:width] * r[:height] }
|
36
|
+
|
37
|
+
# Create entry with block data and rectangle data (if found)
|
38
|
+
if smallest_rect
|
39
|
+
{
|
40
|
+
page: block[:page],
|
41
|
+
text: block[:text],
|
42
|
+
text_x: block[:x],
|
43
|
+
text_y: block[:y],
|
44
|
+
font: block[:font],
|
45
|
+
font_size: block[:font_size],
|
46
|
+
rect_x: smallest_rect[:x],
|
47
|
+
rect_y: smallest_rect[:y],
|
48
|
+
rect_width: smallest_rect[:width],
|
49
|
+
rect_height: smallest_rect[:height],
|
50
|
+
stroke_color: smallest_rect[:stroke_color],
|
51
|
+
fill_color: smallest_rect[:fill_color],
|
52
|
+
line_width: smallest_rect[:line_width]
|
53
|
+
}
|
54
|
+
else
|
55
|
+
{
|
56
|
+
page: block[:page],
|
57
|
+
text: block[:text],
|
58
|
+
text_x: block[:x],
|
59
|
+
text_y: block[:y],
|
60
|
+
font: block[:font],
|
61
|
+
font_size: block[:font_size],
|
62
|
+
rect_x: nil,
|
63
|
+
rect_y: nil,
|
64
|
+
rect_width: nil,
|
65
|
+
rect_height: nil,
|
66
|
+
stroke_color: nil,
|
67
|
+
fill_color: nil,
|
68
|
+
line_width: nil
|
69
|
+
}
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Add empty rectangles that don't contain any text
|
74
|
+
valid_rects.each do |rect|
|
75
|
+
# Check if this rectangle contains any text blocks
|
76
|
+
has_text = valid_blocks.any? do |block|
|
77
|
+
block[:page] == rect[:page] &&
|
78
|
+
block[:x] >= rect[:x] &&
|
79
|
+
block[:x] <= (rect[:x] + rect[:width]) &&
|
80
|
+
block[:y] >= rect[:y] &&
|
81
|
+
block[:y] <= (rect[:y] + rect[:height])
|
82
|
+
end
|
83
|
+
|
84
|
+
# If it doesn't contain text, add it as an empty entry
|
85
|
+
unless has_text
|
86
|
+
combined_data << {
|
87
|
+
page: rect[:page],
|
88
|
+
text: "",
|
89
|
+
text_x: nil,
|
90
|
+
text_y: nil,
|
91
|
+
font: nil,
|
92
|
+
font_size: nil,
|
93
|
+
rect_x: rect[:x],
|
94
|
+
rect_y: rect[:y],
|
95
|
+
rect_width: rect[:width],
|
96
|
+
rect_height: rect[:height],
|
97
|
+
stroke_color: rect[:stroke_color],
|
98
|
+
fill_color: rect[:fill_color],
|
99
|
+
line_width: rect[:line_width]
|
100
|
+
}
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Sort by page, then y (top to bottom), then x (left to right)
|
105
|
+
combined_data.sort_by! do |data|
|
106
|
+
[
|
107
|
+
data[:page],
|
108
|
+
-(data[:rect_y] || data[:text_y] || 0), # Negative for top-to-bottom
|
109
|
+
data[:rect_x] || data[:text_x] || 0
|
110
|
+
]
|
111
|
+
end
|
112
|
+
|
113
|
+
combined_data
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class BuildingDataReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'Building Status' => {
|
8
|
+
key_path: ['building', 'status'],
|
9
|
+
end_pattern: /Boundary/
|
10
|
+
},
|
11
|
+
'Boundary wall' => {
|
12
|
+
key_path: ['building', 'boundary_wall'],
|
13
|
+
end_pattern: /No\.of/
|
14
|
+
},
|
15
|
+
'No.of Building Blocks' => {
|
16
|
+
key_path: ['building', 'blocks'],
|
17
|
+
value_type: :integer
|
18
|
+
},
|
19
|
+
'Pucca Building Blocks' => {
|
20
|
+
key_path: ['building', 'pucca_blocks'],
|
21
|
+
value_type: :integer
|
22
|
+
},
|
23
|
+
'Is this a Shift School?' => {
|
24
|
+
key_path: ['building', 'has_shifts'],
|
25
|
+
end_pattern: /Building/
|
26
|
+
},
|
27
|
+
'Availability of Ramps' => {
|
28
|
+
key_path: ['building', 'accessibility', 'ramps'],
|
29
|
+
end_pattern: /Availability of Hand/
|
30
|
+
},
|
31
|
+
'Availability of Handrails' => {
|
32
|
+
key_path: ['building', 'accessibility', 'handrails'],
|
33
|
+
end_pattern: /Anganwadi/
|
34
|
+
}
|
35
|
+
}
|
36
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class CharacteristicsReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'School Category' => {
|
8
|
+
key_path: ['characteristics', 'category'],
|
9
|
+
end_pattern: /Management/
|
10
|
+
},
|
11
|
+
'School Type' => {
|
12
|
+
key_path: ['characteristics', 'type'],
|
13
|
+
end_pattern: /Lowest/
|
14
|
+
},
|
15
|
+
'Lowest & Highest Class' => {
|
16
|
+
key_path: ['characteristics', 'class_range'],
|
17
|
+
end_pattern: /Pre Primary/
|
18
|
+
},
|
19
|
+
'Pre Primary' => {
|
20
|
+
key_path: ['characteristics', 'pre_primary'],
|
21
|
+
end_pattern: /Medium/
|
22
|
+
},
|
23
|
+
'Is Special School for CWSN?' => {
|
24
|
+
key_path: ['characteristics', 'is_special_school'],
|
25
|
+
end_pattern: /Availability/
|
26
|
+
}
|
27
|
+
}
|
28
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
class CSVWriter
|
4
|
+
def self.write_blocks(blocks, csv_path)
|
5
|
+
CSV.open(csv_path, 'wb') do |csv|
|
6
|
+
csv << ['page', 'x', 'y', 'text', 'font', 'font_size']
|
7
|
+
blocks.each do |block|
|
8
|
+
csv << [
|
9
|
+
block[:page],
|
10
|
+
block[:x],
|
11
|
+
block[:y],
|
12
|
+
block[:text],
|
13
|
+
block[:font],
|
14
|
+
block[:font_size]
|
15
|
+
]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.write_rectangles(rectangles, csv_path)
|
21
|
+
CSV.open(csv_path, 'wb') do |csv|
|
22
|
+
csv << ['page', 'x', 'y', 'width', 'height', 'stroke_color', 'fill_color', 'line_width']
|
23
|
+
rectangles.each do |rect|
|
24
|
+
csv << [
|
25
|
+
rect[:page],
|
26
|
+
rect[:x],
|
27
|
+
rect[:y],
|
28
|
+
rect[:width],
|
29
|
+
rect[:height],
|
30
|
+
rect[:stroke_color],
|
31
|
+
rect[:fill_color],
|
32
|
+
rect[:line_width]
|
33
|
+
]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.write_combined(combined_data, output_path)
|
39
|
+
CSV.open(output_path, 'wb') do |csv|
|
40
|
+
csv << [
|
41
|
+
'page',
|
42
|
+
'text',
|
43
|
+
'text_x',
|
44
|
+
'text_y',
|
45
|
+
'font',
|
46
|
+
'font_size',
|
47
|
+
'rect_x',
|
48
|
+
'rect_y',
|
49
|
+
'rect_width',
|
50
|
+
'rect_height',
|
51
|
+
'stroke_color',
|
52
|
+
'fill_color',
|
53
|
+
'line_width'
|
54
|
+
]
|
55
|
+
|
56
|
+
combined_data.each do |data|
|
57
|
+
csv << [
|
58
|
+
data[:page],
|
59
|
+
data[:text],
|
60
|
+
data[:text_x],
|
61
|
+
data[:text_y],
|
62
|
+
data[:font],
|
63
|
+
data[:font_size],
|
64
|
+
data[:rect_x],
|
65
|
+
data[:rect_y],
|
66
|
+
data[:rect_width],
|
67
|
+
data[:rect_height],
|
68
|
+
data[:stroke_color],
|
69
|
+
data[:fill_color],
|
70
|
+
data[:line_width]
|
71
|
+
]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module DataReaderBase
|
2
|
+
def self.included(base)
|
3
|
+
base.extend(ClassMethods)
|
4
|
+
end
|
5
|
+
|
6
|
+
module ClassMethods
|
7
|
+
def read(lines)
|
8
|
+
require 'yaml'
|
9
|
+
template = YAML.load_file('template.yml')
|
10
|
+
data = { base_key => template[base_key] }
|
11
|
+
|
12
|
+
lines.each_with_index do |line, i|
|
13
|
+
next_line = lines[i + 1]&.strip
|
14
|
+
|
15
|
+
if mapping = self::FIELD_MAPPINGS[line]
|
16
|
+
if mapping[:is_table]
|
17
|
+
process_table_data(data, lines[i+1..i+20], mapping)
|
18
|
+
else
|
19
|
+
process_simple_field(data, next_line, mapping)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
cleanup_data(data)
|
25
|
+
data
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def process_table_data(data, table_lines, mapping)
|
31
|
+
mapping[:table_config][:sections].each do |section|
|
32
|
+
idx = table_lines.index { |l| l.match?(section[:trigger]) }
|
33
|
+
if idx && idx + section[:offset] + section[:fields].length <= table_lines.length
|
34
|
+
section[:fields].each_with_index do |field, field_idx|
|
35
|
+
value = table_lines[idx + section[:offset] + field_idx]
|
36
|
+
if value =~ /^\d+$/
|
37
|
+
set_value_at_path(data, mapping[:key_path] + field[:key], transform_value(value, field[:value_type]))
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def process_simple_field(data, next_line, mapping)
|
45
|
+
return unless next_line
|
46
|
+
return if mapping[:end_pattern] && next_line.match?(mapping[:end_pattern])
|
47
|
+
|
48
|
+
value = transform_value(next_line, mapping[:value_type])
|
49
|
+
set_value_at_path(data, mapping[:key_path], value)
|
50
|
+
end
|
51
|
+
|
52
|
+
def transform_value(value, type)
|
53
|
+
case type
|
54
|
+
when :integer
|
55
|
+
value.to_i if value =~ /^\d+$/
|
56
|
+
else
|
57
|
+
value
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def set_value_at_path(data, path, value)
|
62
|
+
current = data
|
63
|
+
path[0..-2].each do |key|
|
64
|
+
current[key] ||= {}
|
65
|
+
current = current[key]
|
66
|
+
end
|
67
|
+
current[path.last] = value
|
68
|
+
end
|
69
|
+
|
70
|
+
def cleanup_data(data)
|
71
|
+
base = data[base_key]
|
72
|
+
return unless base.is_a?(Hash)
|
73
|
+
|
74
|
+
base.each do |_, section|
|
75
|
+
if section.is_a?(Hash)
|
76
|
+
section.reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
base.reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
80
|
+
end
|
81
|
+
|
82
|
+
def base_key
|
83
|
+
self.name.gsub('DataReader', '').downcase
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class DigitalFacilitiesDataReader
|
2
|
+
def self.read(lines)
|
3
|
+
data = { 'digital_facilities' => YAML.load_file('template.yml')['digital_facilities'] }
|
4
|
+
|
5
|
+
lines.each_with_index do |line, i|
|
6
|
+
case line
|
7
|
+
when /Digital/
|
8
|
+
data['digital_facilities']['ict_lab'] = lines[i + 2] if lines[i + 2] =~ /^[12]-/
|
9
|
+
data['digital_facilities']['internet'] = lines[i + 4] if lines[i + 4] =~ /^[12]-/
|
10
|
+
if lines[i + 6] =~ /^\d+$/
|
11
|
+
data['digital_facilities']['computers']['desktop'] = lines[i + 6].to_i
|
12
|
+
end
|
13
|
+
if lines[i + 8] =~ /^\d+$/
|
14
|
+
data['digital_facilities']['computers']['laptop'] = lines[i + 8].to_i
|
15
|
+
end
|
16
|
+
if lines[i + 10] =~ /^\d+$/
|
17
|
+
data['digital_facilities']['computers']['tablet'] = lines[i + 10].to_i
|
18
|
+
end
|
19
|
+
if lines[i + 12] =~ /^\d+$/
|
20
|
+
data['digital_facilities']['peripherals']['printer'] = lines[i + 12].to_i
|
21
|
+
end
|
22
|
+
if lines[i + 14] =~ /^\d+$/
|
23
|
+
data['digital_facilities']['peripherals']['projector'] = lines[i + 14].to_i
|
24
|
+
end
|
25
|
+
data['digital_facilities']['smart_classroom']['dth'] = lines[i + 16] if lines[i + 16] =~ /^[12]-/
|
26
|
+
if lines[i + 18] =~ /^\d+$/
|
27
|
+
data['digital_facilities']['smart_classroom']['digiboard'] = lines[i + 18].to_i
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Clean up empty sections
|
33
|
+
data['digital_facilities'].each do |key, section|
|
34
|
+
if section.is_a?(Hash)
|
35
|
+
section.reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
data['digital_facilities'].reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
39
|
+
|
40
|
+
data unless data['digital_facilities'].empty?
|
41
|
+
end
|
42
|
+
end
|