udise_school_report_reader 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +20 -0
- data/LICENSE.txt +21 -0
- data/README.md +45 -0
- data/lib/udise_school_report_reader/activities_data_reader.rb +58 -0
- data/lib/udise_school_report_reader/anganwadi_data_reader.rb +22 -0
- data/lib/udise_school_report_reader/basic_info_data_reader.rb +29 -0
- data/lib/udise_school_report_reader/block_rectangle_combiner.rb +115 -0
- data/lib/udise_school_report_reader/building_data_reader.rb +36 -0
- data/lib/udise_school_report_reader/characteristics_reader.rb +28 -0
- data/lib/udise_school_report_reader/csv_writer.rb +75 -0
- data/lib/udise_school_report_reader/data_reader_base.rb +86 -0
- data/lib/udise_school_report_reader/digital_facilities_data_reader.rb +42 -0
- data/lib/udise_school_report_reader/enrollment_data_reader.rb +136 -0
- data/lib/udise_school_report_reader/enrollment_html_writer.rb +81 -0
- data/lib/udise_school_report_reader/enrollment_yaml_writer.rb +62 -0
- data/lib/udise_school_report_reader/ews_data_reader.rb +118 -0
- data/lib/udise_school_report_reader/ews_html_writer.rb +63 -0
- data/lib/udise_school_report_reader/ews_yaml_writer.rb +31 -0
- data/lib/udise_school_report_reader/location_data_reader.rb +47 -0
- data/lib/udise_school_report_reader/official_data_reader.rb +40 -0
- data/lib/udise_school_report_reader/pdf_block_extractor.rb +49 -0
- data/lib/udise_school_report_reader/pdf_content_compressor.rb +36 -0
- data/lib/udise_school_report_reader/pdf_rectangle_extractor.rb +53 -0
- data/lib/udise_school_report_reader/rooms_data_reader.rb +36 -0
- data/lib/udise_school_report_reader/rte_data_reader.rb +118 -0
- data/lib/udise_school_report_reader/rte_html_writer.rb +63 -0
- data/lib/udise_school_report_reader/rte_yaml_writer.rb +61 -0
- data/lib/udise_school_report_reader/sanitation_data_reader.rb +56 -0
- data/lib/udise_school_report_reader/school_report_parser.rb +295 -0
- data/lib/udise_school_report_reader/teacher_data_reader.rb +204 -0
- data/lib/udise_school_report_reader/version.rb +3 -0
- data/lib/udise_school_report_reader.rb +41 -0
- data/test/school_report_parser_test.rb +62 -0
- metadata +165 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: eb4e7f18e92a981045b328698bf04015f68aee7f22c9bc1a19b8ceab7c416eca
|
4
|
+
data.tar.gz: c0e734f6c22a5ad1ce6af4b2ee094ca140540500ebe75ba1594c6f1d6eb528bd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c453df46b297daef2ed774d838289e9ea41588d48f07db24964ca6fcd6cd918c8a99f789c7a89cb14bc96b8769091a1e2cb36183b90615a30cb814d7f16d586a
|
7
|
+
data.tar.gz: b9b2b0690d6f0d1a65f0f36e2d8a0a707860653332066f8591b5cdce573a5d5429f3971e57b7d889665398640282abfc6b543b335394a0d0d8e3faff7c740a19
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
## [0.1.0] - 2024-01-01
|
4
|
+
|
5
|
+
### Added
|
6
|
+
- Initial release
|
7
|
+
- Basic PDF parsing functionality
|
8
|
+
- Data readers for various sections:
|
9
|
+
- Basic school information
|
10
|
+
- Teacher details
|
11
|
+
- Room information
|
12
|
+
- Location data
|
13
|
+
- Building information
|
14
|
+
- Sanitation facilities
|
15
|
+
- Digital facilities
|
16
|
+
- RTE compliance
|
17
|
+
- EWS details
|
18
|
+
- Enrollment data
|
19
|
+
- HTML and YAML writers for data export
|
20
|
+
- CSV export functionality
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 UDISE Plus
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# UDISE School Report Reader
|
2
|
+
|
3
|
+
A Ruby gem for parsing and extracting data from UDISE (Unified District Information System for Education) school reports.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'udise_school_report_reader'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle install
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install udise_school_report_reader
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'udise_school_report_reader'
|
25
|
+
|
26
|
+
# Initialize the parser with a PDF file
|
27
|
+
parser = UdiseSchoolReportReader::SchoolReportParser.new('path/to/report.pdf')
|
28
|
+
|
29
|
+
## Features
|
30
|
+
|
31
|
+
- Parse UDISE school reports in PDF format
|
32
|
+
|
33
|
+
## Development
|
34
|
+
|
35
|
+
After checking out the repo, run `bundle install` to install dependencies. Then, run `rake test` to run the tests.
|
36
|
+
|
37
|
+
To install this gem onto your local machine, run `bundle exec rake install`.
|
38
|
+
|
39
|
+
## Contributing
|
40
|
+
|
41
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/UDISE-Plus/udise-school-report-reader.
|
42
|
+
|
43
|
+
## License
|
44
|
+
|
45
|
+
The gem is available as open source under the terms of the [MIT License](LICENSE.txt).
|
@@ -0,0 +1,58 @@
|
|
1
|
+
class ActivitiesDataReader
|
2
|
+
def self.read(lines)
|
3
|
+
data = {
|
4
|
+
'activities' => {
|
5
|
+
'inspections' => {},
|
6
|
+
'instructional_days' => {},
|
7
|
+
'student_hours' => {},
|
8
|
+
'teacher_hours' => {}
|
9
|
+
}
|
10
|
+
}
|
11
|
+
|
12
|
+
lines.each_with_index do |line, i|
|
13
|
+
next_line = lines[i + 1]&.strip
|
14
|
+
|
15
|
+
case line
|
16
|
+
when "Acad. Inspections"
|
17
|
+
data['activities']['inspections']['academic'] = next_line.to_i if next_line =~ /^\d+$/
|
18
|
+
when "CRC Coordinator"
|
19
|
+
data['activities']['inspections']['crc_coordinator'] = next_line.to_i if next_line =~ /^\d+$/
|
20
|
+
when "Block Level Officers"
|
21
|
+
data['activities']['inspections']['block_level'] = next_line.to_i if next_line =~ /^\d+$/
|
22
|
+
when "State/District Officers"
|
23
|
+
data['activities']['inspections']['state_district'] = next_line.to_i if next_line =~ /^\d+$/
|
24
|
+
when "Instructional days"
|
25
|
+
if lines[i + 1] =~ /^\d+$/
|
26
|
+
data['activities']['instructional_days']['primary'] = lines[i + 1].to_i
|
27
|
+
data['activities']['instructional_days']['upper_primary'] = lines[i + 2].to_i if lines[i + 2] =~ /^\d+$/
|
28
|
+
data['activities']['instructional_days']['secondary'] = lines[i + 3].to_i if lines[i + 3] =~ /^\d+$/
|
29
|
+
data['activities']['instructional_days']['higher_secondary'] = lines[i + 4].to_i if lines[i + 4] =~ /^\d+$/
|
30
|
+
end
|
31
|
+
when "Avg.School hrs.Std."
|
32
|
+
if lines[i + 1] =~ /^\d+\.?\d*$/
|
33
|
+
data['activities']['student_hours']['primary'] = lines[i + 1].to_f
|
34
|
+
data['activities']['student_hours']['upper_primary'] = lines[i + 2].to_f if lines[i + 2] =~ /^\d+\.?\d*$/
|
35
|
+
data['activities']['student_hours']['secondary'] = lines[i + 3].to_f if lines[i + 3] =~ /^\d+\.?\d*$/
|
36
|
+
data['activities']['student_hours']['higher_secondary'] = lines[i + 4].to_f if lines[i + 4] =~ /^\d+\.?\d*$/
|
37
|
+
end
|
38
|
+
when "Avg.School hrs.Tch."
|
39
|
+
if lines[i + 1] =~ /^\d+\.?\d*$/
|
40
|
+
data['activities']['teacher_hours']['primary'] = lines[i + 1].to_f
|
41
|
+
data['activities']['teacher_hours']['upper_primary'] = lines[i + 2].to_f if lines[i + 2] =~ /^\d+\.?\d*$/
|
42
|
+
data['activities']['teacher_hours']['secondary'] = lines[i + 3].to_f if lines[i + 3] =~ /^\d+\.?\d*$/
|
43
|
+
data['activities']['teacher_hours']['higher_secondary'] = lines[i + 4].to_f if lines[i + 4] =~ /^\d+\.?\d*$/
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Clean up empty sections
|
49
|
+
data['activities']['inspections'].reject! { |_, v| v.nil? }
|
50
|
+
data['activities']['instructional_days'].reject! { |_, v| v.nil? }
|
51
|
+
data['activities']['student_hours'].reject! { |_, v| v.nil? }
|
52
|
+
data['activities']['teacher_hours'].reject! { |_, v| v.nil? }
|
53
|
+
data['activities'].reject! { |_, v| v.empty? }
|
54
|
+
data.reject! { |_, v| v.empty? }
|
55
|
+
|
56
|
+
data
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class AnganwadiDataReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'Anganwadi At Premises' => {
|
8
|
+
key_path: ['anganwadi', 'at_premises']
|
9
|
+
},
|
10
|
+
'Anganwadi Boys' => {
|
11
|
+
key_path: ['anganwadi', 'boys'],
|
12
|
+
value_type: :integer
|
13
|
+
},
|
14
|
+
'Anganwadi Girls' => {
|
15
|
+
key_path: ['anganwadi', 'girls'],
|
16
|
+
value_type: :integer
|
17
|
+
},
|
18
|
+
'Anganwadi Worker' => {
|
19
|
+
key_path: ['anganwadi', 'worker']
|
20
|
+
}
|
21
|
+
}
|
22
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class BasicInfoDataReader
|
2
|
+
def self.read(lines)
|
3
|
+
require 'yaml'
|
4
|
+
template = YAML.load_file('template.yml')
|
5
|
+
data = { 'basic_info' => template['basic_info'] }
|
6
|
+
|
7
|
+
lines.each_with_index do |line, i|
|
8
|
+
next_line = lines[i + 1]&.strip
|
9
|
+
|
10
|
+
case line
|
11
|
+
when "UDISE CODE"
|
12
|
+
if next_line && (match = next_line.match(/(\d{2})\s*(\d{2})\s*(\d{2})\s*(\d{2})\s*(\d{3})/))
|
13
|
+
data['basic_info']['udise_code'] = match[1..5].join('')
|
14
|
+
end
|
15
|
+
when "School Name"
|
16
|
+
if next_line && next_line.include?("CARMEL")
|
17
|
+
data['basic_info']['name'] = next_line
|
18
|
+
end
|
19
|
+
when /Academic Year.*:\s*(\d{4}-\d{2})/
|
20
|
+
data['basic_info']['academic_year'] = $1
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Clean up empty sections
|
25
|
+
data['basic_info'].reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
26
|
+
|
27
|
+
data
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
class BlockRectangleCombiner
|
2
|
+
def self.combine(blocks, rects)
|
3
|
+
# Filter out blocks with missing coordinates
|
4
|
+
invalid_blocks = blocks.reject { |block| block[:x] && block[:y] || block[:text].to_s.empty? }
|
5
|
+
if invalid_blocks.any?
|
6
|
+
warn "Warning: Found #{invalid_blocks.size} non-empty blocks with missing coordinates"
|
7
|
+
invalid_blocks.each do |block|
|
8
|
+
warn " - Page #{block[:page]}: '#{block[:text]}'"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
valid_blocks = blocks.select { |block| block[:x] && block[:y] }
|
12
|
+
|
13
|
+
# Filter out rectangles with missing or invalid coordinates
|
14
|
+
invalid_rects = rects.reject { |rect| rect[:x] && rect[:y] && rect[:width] && rect[:height] && rect[:width] > 0 && rect[:height] > 0 }
|
15
|
+
if invalid_rects.any?
|
16
|
+
warn "Warning: Found #{invalid_rects.size} rectangles with invalid coordinates"
|
17
|
+
invalid_rects.each do |rect|
|
18
|
+
warn " - Page #{rect[:page]}: x=#{rect[:x]}, y=#{rect[:y]}, w=#{rect[:width]}, h=#{rect[:height]}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
valid_rects = rects.select { |rect| rect[:x] && rect[:y] && rect[:width] && rect[:height] && rect[:width] > 0 && rect[:height] > 0 }
|
22
|
+
|
23
|
+
# For each text block, find its smallest containing rectangle
|
24
|
+
combined_data = valid_blocks.map do |block|
|
25
|
+
# Find rectangles on the same page that contain this text block
|
26
|
+
containing_rects = valid_rects.select do |rect|
|
27
|
+
rect[:page] == block[:page] &&
|
28
|
+
block[:x] >= rect[:x] &&
|
29
|
+
block[:x] <= (rect[:x] + rect[:width]) &&
|
30
|
+
block[:y] >= rect[:y] &&
|
31
|
+
block[:y] <= (rect[:y] + rect[:height])
|
32
|
+
end
|
33
|
+
|
34
|
+
# Find the smallest containing rectangle by area
|
35
|
+
smallest_rect = containing_rects.min_by { |r| r[:width] * r[:height] }
|
36
|
+
|
37
|
+
# Create entry with block data and rectangle data (if found)
|
38
|
+
if smallest_rect
|
39
|
+
{
|
40
|
+
page: block[:page],
|
41
|
+
text: block[:text],
|
42
|
+
text_x: block[:x],
|
43
|
+
text_y: block[:y],
|
44
|
+
font: block[:font],
|
45
|
+
font_size: block[:font_size],
|
46
|
+
rect_x: smallest_rect[:x],
|
47
|
+
rect_y: smallest_rect[:y],
|
48
|
+
rect_width: smallest_rect[:width],
|
49
|
+
rect_height: smallest_rect[:height],
|
50
|
+
stroke_color: smallest_rect[:stroke_color],
|
51
|
+
fill_color: smallest_rect[:fill_color],
|
52
|
+
line_width: smallest_rect[:line_width]
|
53
|
+
}
|
54
|
+
else
|
55
|
+
{
|
56
|
+
page: block[:page],
|
57
|
+
text: block[:text],
|
58
|
+
text_x: block[:x],
|
59
|
+
text_y: block[:y],
|
60
|
+
font: block[:font],
|
61
|
+
font_size: block[:font_size],
|
62
|
+
rect_x: nil,
|
63
|
+
rect_y: nil,
|
64
|
+
rect_width: nil,
|
65
|
+
rect_height: nil,
|
66
|
+
stroke_color: nil,
|
67
|
+
fill_color: nil,
|
68
|
+
line_width: nil
|
69
|
+
}
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Add empty rectangles that don't contain any text
|
74
|
+
valid_rects.each do |rect|
|
75
|
+
# Check if this rectangle contains any text blocks
|
76
|
+
has_text = valid_blocks.any? do |block|
|
77
|
+
block[:page] == rect[:page] &&
|
78
|
+
block[:x] >= rect[:x] &&
|
79
|
+
block[:x] <= (rect[:x] + rect[:width]) &&
|
80
|
+
block[:y] >= rect[:y] &&
|
81
|
+
block[:y] <= (rect[:y] + rect[:height])
|
82
|
+
end
|
83
|
+
|
84
|
+
# If it doesn't contain text, add it as an empty entry
|
85
|
+
unless has_text
|
86
|
+
combined_data << {
|
87
|
+
page: rect[:page],
|
88
|
+
text: "",
|
89
|
+
text_x: nil,
|
90
|
+
text_y: nil,
|
91
|
+
font: nil,
|
92
|
+
font_size: nil,
|
93
|
+
rect_x: rect[:x],
|
94
|
+
rect_y: rect[:y],
|
95
|
+
rect_width: rect[:width],
|
96
|
+
rect_height: rect[:height],
|
97
|
+
stroke_color: rect[:stroke_color],
|
98
|
+
fill_color: rect[:fill_color],
|
99
|
+
line_width: rect[:line_width]
|
100
|
+
}
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Sort by page, then y (top to bottom), then x (left to right)
|
105
|
+
combined_data.sort_by! do |data|
|
106
|
+
[
|
107
|
+
data[:page],
|
108
|
+
-(data[:rect_y] || data[:text_y] || 0), # Negative for top-to-bottom
|
109
|
+
data[:rect_x] || data[:text_x] || 0
|
110
|
+
]
|
111
|
+
end
|
112
|
+
|
113
|
+
combined_data
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class BuildingDataReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'Building Status' => {
|
8
|
+
key_path: ['building', 'status'],
|
9
|
+
end_pattern: /Boundary/
|
10
|
+
},
|
11
|
+
'Boundary wall' => {
|
12
|
+
key_path: ['building', 'boundary_wall'],
|
13
|
+
end_pattern: /No\.of/
|
14
|
+
},
|
15
|
+
'No.of Building Blocks' => {
|
16
|
+
key_path: ['building', 'blocks'],
|
17
|
+
value_type: :integer
|
18
|
+
},
|
19
|
+
'Pucca Building Blocks' => {
|
20
|
+
key_path: ['building', 'pucca_blocks'],
|
21
|
+
value_type: :integer
|
22
|
+
},
|
23
|
+
'Is this a Shift School?' => {
|
24
|
+
key_path: ['building', 'has_shifts'],
|
25
|
+
end_pattern: /Building/
|
26
|
+
},
|
27
|
+
'Availability of Ramps' => {
|
28
|
+
key_path: ['building', 'accessibility', 'ramps'],
|
29
|
+
end_pattern: /Availability of Hand/
|
30
|
+
},
|
31
|
+
'Availability of Handrails' => {
|
32
|
+
key_path: ['building', 'accessibility', 'handrails'],
|
33
|
+
end_pattern: /Anganwadi/
|
34
|
+
}
|
35
|
+
}
|
36
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class CharacteristicsReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'School Category' => {
|
8
|
+
key_path: ['characteristics', 'category'],
|
9
|
+
end_pattern: /Management/
|
10
|
+
},
|
11
|
+
'School Type' => {
|
12
|
+
key_path: ['characteristics', 'type'],
|
13
|
+
end_pattern: /Lowest/
|
14
|
+
},
|
15
|
+
'Lowest & Highest Class' => {
|
16
|
+
key_path: ['characteristics', 'class_range'],
|
17
|
+
end_pattern: /Pre Primary/
|
18
|
+
},
|
19
|
+
'Pre Primary' => {
|
20
|
+
key_path: ['characteristics', 'pre_primary'],
|
21
|
+
end_pattern: /Medium/
|
22
|
+
},
|
23
|
+
'Is Special School for CWSN?' => {
|
24
|
+
key_path: ['characteristics', 'is_special_school'],
|
25
|
+
end_pattern: /Availability/
|
26
|
+
}
|
27
|
+
}
|
28
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
class CSVWriter
|
4
|
+
def self.write_blocks(blocks, csv_path)
|
5
|
+
CSV.open(csv_path, 'wb') do |csv|
|
6
|
+
csv << ['page', 'x', 'y', 'text', 'font', 'font_size']
|
7
|
+
blocks.each do |block|
|
8
|
+
csv << [
|
9
|
+
block[:page],
|
10
|
+
block[:x],
|
11
|
+
block[:y],
|
12
|
+
block[:text],
|
13
|
+
block[:font],
|
14
|
+
block[:font_size]
|
15
|
+
]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.write_rectangles(rectangles, csv_path)
|
21
|
+
CSV.open(csv_path, 'wb') do |csv|
|
22
|
+
csv << ['page', 'x', 'y', 'width', 'height', 'stroke_color', 'fill_color', 'line_width']
|
23
|
+
rectangles.each do |rect|
|
24
|
+
csv << [
|
25
|
+
rect[:page],
|
26
|
+
rect[:x],
|
27
|
+
rect[:y],
|
28
|
+
rect[:width],
|
29
|
+
rect[:height],
|
30
|
+
rect[:stroke_color],
|
31
|
+
rect[:fill_color],
|
32
|
+
rect[:line_width]
|
33
|
+
]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.write_combined(combined_data, output_path)
|
39
|
+
CSV.open(output_path, 'wb') do |csv|
|
40
|
+
csv << [
|
41
|
+
'page',
|
42
|
+
'text',
|
43
|
+
'text_x',
|
44
|
+
'text_y',
|
45
|
+
'font',
|
46
|
+
'font_size',
|
47
|
+
'rect_x',
|
48
|
+
'rect_y',
|
49
|
+
'rect_width',
|
50
|
+
'rect_height',
|
51
|
+
'stroke_color',
|
52
|
+
'fill_color',
|
53
|
+
'line_width'
|
54
|
+
]
|
55
|
+
|
56
|
+
combined_data.each do |data|
|
57
|
+
csv << [
|
58
|
+
data[:page],
|
59
|
+
data[:text],
|
60
|
+
data[:text_x],
|
61
|
+
data[:text_y],
|
62
|
+
data[:font],
|
63
|
+
data[:font_size],
|
64
|
+
data[:rect_x],
|
65
|
+
data[:rect_y],
|
66
|
+
data[:rect_width],
|
67
|
+
data[:rect_height],
|
68
|
+
data[:stroke_color],
|
69
|
+
data[:fill_color],
|
70
|
+
data[:line_width]
|
71
|
+
]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module DataReaderBase
|
2
|
+
def self.included(base)
|
3
|
+
base.extend(ClassMethods)
|
4
|
+
end
|
5
|
+
|
6
|
+
module ClassMethods
|
7
|
+
def read(lines)
|
8
|
+
require 'yaml'
|
9
|
+
template = YAML.load_file('template.yml')
|
10
|
+
data = { base_key => template[base_key] }
|
11
|
+
|
12
|
+
lines.each_with_index do |line, i|
|
13
|
+
next_line = lines[i + 1]&.strip
|
14
|
+
|
15
|
+
if mapping = self::FIELD_MAPPINGS[line]
|
16
|
+
if mapping[:is_table]
|
17
|
+
process_table_data(data, lines[i+1..i+20], mapping)
|
18
|
+
else
|
19
|
+
process_simple_field(data, next_line, mapping)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
cleanup_data(data)
|
25
|
+
data
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def process_table_data(data, table_lines, mapping)
|
31
|
+
mapping[:table_config][:sections].each do |section|
|
32
|
+
idx = table_lines.index { |l| l.match?(section[:trigger]) }
|
33
|
+
if idx && idx + section[:offset] + section[:fields].length <= table_lines.length
|
34
|
+
section[:fields].each_with_index do |field, field_idx|
|
35
|
+
value = table_lines[idx + section[:offset] + field_idx]
|
36
|
+
if value =~ /^\d+$/
|
37
|
+
set_value_at_path(data, mapping[:key_path] + field[:key], transform_value(value, field[:value_type]))
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def process_simple_field(data, next_line, mapping)
|
45
|
+
return unless next_line
|
46
|
+
return if mapping[:end_pattern] && next_line.match?(mapping[:end_pattern])
|
47
|
+
|
48
|
+
value = transform_value(next_line, mapping[:value_type])
|
49
|
+
set_value_at_path(data, mapping[:key_path], value)
|
50
|
+
end
|
51
|
+
|
52
|
+
def transform_value(value, type)
|
53
|
+
case type
|
54
|
+
when :integer
|
55
|
+
value.to_i if value =~ /^\d+$/
|
56
|
+
else
|
57
|
+
value
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def set_value_at_path(data, path, value)
|
62
|
+
current = data
|
63
|
+
path[0..-2].each do |key|
|
64
|
+
current[key] ||= {}
|
65
|
+
current = current[key]
|
66
|
+
end
|
67
|
+
current[path.last] = value
|
68
|
+
end
|
69
|
+
|
70
|
+
def cleanup_data(data)
|
71
|
+
base = data[base_key]
|
72
|
+
return unless base.is_a?(Hash)
|
73
|
+
|
74
|
+
base.each do |_, section|
|
75
|
+
if section.is_a?(Hash)
|
76
|
+
section.reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
base.reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
80
|
+
end
|
81
|
+
|
82
|
+
def base_key
|
83
|
+
self.name.gsub('DataReader', '').downcase
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class DigitalFacilitiesDataReader
|
2
|
+
def self.read(lines)
|
3
|
+
data = { 'digital_facilities' => YAML.load_file('template.yml')['digital_facilities'] }
|
4
|
+
|
5
|
+
lines.each_with_index do |line, i|
|
6
|
+
case line
|
7
|
+
when /Digital/
|
8
|
+
data['digital_facilities']['ict_lab'] = lines[i + 2] if lines[i + 2] =~ /^[12]-/
|
9
|
+
data['digital_facilities']['internet'] = lines[i + 4] if lines[i + 4] =~ /^[12]-/
|
10
|
+
if lines[i + 6] =~ /^\d+$/
|
11
|
+
data['digital_facilities']['computers']['desktop'] = lines[i + 6].to_i
|
12
|
+
end
|
13
|
+
if lines[i + 8] =~ /^\d+$/
|
14
|
+
data['digital_facilities']['computers']['laptop'] = lines[i + 8].to_i
|
15
|
+
end
|
16
|
+
if lines[i + 10] =~ /^\d+$/
|
17
|
+
data['digital_facilities']['computers']['tablet'] = lines[i + 10].to_i
|
18
|
+
end
|
19
|
+
if lines[i + 12] =~ /^\d+$/
|
20
|
+
data['digital_facilities']['peripherals']['printer'] = lines[i + 12].to_i
|
21
|
+
end
|
22
|
+
if lines[i + 14] =~ /^\d+$/
|
23
|
+
data['digital_facilities']['peripherals']['projector'] = lines[i + 14].to_i
|
24
|
+
end
|
25
|
+
data['digital_facilities']['smart_classroom']['dth'] = lines[i + 16] if lines[i + 16] =~ /^[12]-/
|
26
|
+
if lines[i + 18] =~ /^\d+$/
|
27
|
+
data['digital_facilities']['smart_classroom']['digiboard'] = lines[i + 18].to_i
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Clean up empty sections
|
33
|
+
data['digital_facilities'].each do |key, section|
|
34
|
+
if section.is_a?(Hash)
|
35
|
+
section.reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
data['digital_facilities'].reject! { |_, v| v.nil? || (v.is_a?(Hash) && v.empty?) }
|
39
|
+
|
40
|
+
data unless data['digital_facilities'].empty?
|
41
|
+
end
|
42
|
+
end
|