udise_school_report_reader 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +20 -0
- data/LICENSE.txt +21 -0
- data/README.md +45 -0
- data/lib/udise_school_report_reader/activities_data_reader.rb +58 -0
- data/lib/udise_school_report_reader/anganwadi_data_reader.rb +22 -0
- data/lib/udise_school_report_reader/basic_info_data_reader.rb +29 -0
- data/lib/udise_school_report_reader/block_rectangle_combiner.rb +115 -0
- data/lib/udise_school_report_reader/building_data_reader.rb +36 -0
- data/lib/udise_school_report_reader/characteristics_reader.rb +28 -0
- data/lib/udise_school_report_reader/csv_writer.rb +75 -0
- data/lib/udise_school_report_reader/data_reader_base.rb +86 -0
- data/lib/udise_school_report_reader/digital_facilities_data_reader.rb +42 -0
- data/lib/udise_school_report_reader/enrollment_data_reader.rb +136 -0
- data/lib/udise_school_report_reader/enrollment_html_writer.rb +81 -0
- data/lib/udise_school_report_reader/enrollment_yaml_writer.rb +62 -0
- data/lib/udise_school_report_reader/ews_data_reader.rb +118 -0
- data/lib/udise_school_report_reader/ews_html_writer.rb +63 -0
- data/lib/udise_school_report_reader/ews_yaml_writer.rb +31 -0
- data/lib/udise_school_report_reader/location_data_reader.rb +47 -0
- data/lib/udise_school_report_reader/official_data_reader.rb +40 -0
- data/lib/udise_school_report_reader/pdf_block_extractor.rb +49 -0
- data/lib/udise_school_report_reader/pdf_content_compressor.rb +36 -0
- data/lib/udise_school_report_reader/pdf_rectangle_extractor.rb +53 -0
- data/lib/udise_school_report_reader/rooms_data_reader.rb +36 -0
- data/lib/udise_school_report_reader/rte_data_reader.rb +118 -0
- data/lib/udise_school_report_reader/rte_html_writer.rb +63 -0
- data/lib/udise_school_report_reader/rte_yaml_writer.rb +61 -0
- data/lib/udise_school_report_reader/sanitation_data_reader.rb +56 -0
- data/lib/udise_school_report_reader/school_report_parser.rb +295 -0
- data/lib/udise_school_report_reader/teacher_data_reader.rb +204 -0
- data/lib/udise_school_report_reader/version.rb +3 -0
- data/lib/udise_school_report_reader.rb +41 -0
- data/test/school_report_parser_test.rb +62 -0
- metadata +165 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
class EnrollmentDataReader
|
2
|
+
ALL_CATEGORIES = [
|
3
|
+
SOCIAL_CATEGORIES = [
|
4
|
+
{ key: 'gen', label: 'Gen' },
|
5
|
+
{ key: 'sc', label: 'SC' },
|
6
|
+
{ key: 'st', label: 'ST' },
|
7
|
+
{ key: 'obc', label: 'OBC' }
|
8
|
+
].freeze,
|
9
|
+
RELIGION_CATEGORIES = [
|
10
|
+
{ key: 'musl', label: 'Musl' },
|
11
|
+
{ key: 'chris', label: 'Chris' },
|
12
|
+
{ key: 'sikh', label: 'Sikh' },
|
13
|
+
{ key: 'budd', label: 'Budd' },
|
14
|
+
{ key: 'parsi', label: 'Parsi' },
|
15
|
+
{ key: 'jain', label: 'Jain' },
|
16
|
+
{ key: 'others', label: 'Others' }
|
17
|
+
].freeze,
|
18
|
+
OTHER_CATEGORIES = [
|
19
|
+
{ key: 'aadh', label: 'Aadh' },
|
20
|
+
{ key: 'bpl', label: 'BPL' },
|
21
|
+
{ key: 'rept', label: 'Rept' },
|
22
|
+
{ key: 'cwsn', label: 'CWSN' }
|
23
|
+
].freeze,
|
24
|
+
AGE_CATEGORIES = (3..22).map do |age|
|
25
|
+
{ key: "age_#{age}", label: age == 3 ? '>3' : age.to_s }
|
26
|
+
end.freeze,
|
27
|
+
].flatten.freeze
|
28
|
+
|
29
|
+
def self.read(csv_path) = new(csv_path).read
|
30
|
+
|
31
|
+
def initialize(csv_path)
|
32
|
+
@csv_path = csv_path
|
33
|
+
@x_cutoff = 0
|
34
|
+
@category_y_coords = {}
|
35
|
+
end
|
36
|
+
|
37
|
+
def read
|
38
|
+
# Initialize arrays for different row types
|
39
|
+
grade_rows = []
|
40
|
+
bg_rows = []
|
41
|
+
category_rows = {}
|
42
|
+
|
43
|
+
ALL_CATEGORIES.each do |category|
|
44
|
+
category_rows[category[:key]] = []
|
45
|
+
end
|
46
|
+
|
47
|
+
# First pass to collect y-coordinates for categories
|
48
|
+
CSV.foreach(@csv_path, headers: true) do |row|
|
49
|
+
if row['page'] == '2' && (row['rect_x'].to_f - 27.0).abs < 5.0
|
50
|
+
ALL_CATEGORIES.each do |category|
|
51
|
+
if row['text'].downcase == category[:label].downcase
|
52
|
+
@category_y_coords[category[:key]] = row['rect_y'].to_f
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
CSV.foreach(@csv_path, headers: true) do |row|
|
59
|
+
if row['page'] == '2'
|
60
|
+
if row['text'] == "Total" && row['rect_y'].to_f == 778.0
|
61
|
+
@x_cutoff = row['rect_x'].to_f
|
62
|
+
end
|
63
|
+
|
64
|
+
if ['Pre-Pr', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X', 'XI', 'XII'].include?(row['text'])
|
65
|
+
if row['text_y'].to_f == 780.0
|
66
|
+
grade_rows << row
|
67
|
+
end
|
68
|
+
elsif ['B', 'G'].include?(row['text'])
|
69
|
+
if row['text_y'].to_f == 768.0
|
70
|
+
bg_rows << row
|
71
|
+
end
|
72
|
+
elsif row['text'] =~ /^\d+$/
|
73
|
+
y_coord = row['rect_y'].to_f
|
74
|
+
ALL_CATEGORIES.each do |category|
|
75
|
+
if @category_y_coords[category[:key]] && (y_coord - @category_y_coords[category[:key]]).abs < 5.0
|
76
|
+
category_rows[category[:key]] << row
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
return nil if grade_rows.empty?
|
84
|
+
|
85
|
+
# Sort and filter rows
|
86
|
+
[grade_rows, bg_rows].each do |rows|
|
87
|
+
rows.sort_by! { |row| row['text_x'].to_f }
|
88
|
+
rows.reject! { |row| row['text_x'].to_f >= @x_cutoff }
|
89
|
+
end
|
90
|
+
|
91
|
+
category_rows.values.each do |rows|
|
92
|
+
rows.sort_by! { |row| row['text_x'].to_f }
|
93
|
+
rows.reject! { |row| row['text_x'].to_f >= @x_cutoff }
|
94
|
+
end
|
95
|
+
|
96
|
+
# Group B,G pairs
|
97
|
+
bg_pairs = bg_rows.each_slice(2).map do |b, g|
|
98
|
+
x_mid = (b['text_x'].to_f + g['text_x'].to_f) / 2
|
99
|
+
[x_mid, [b, g]]
|
100
|
+
end.to_h
|
101
|
+
|
102
|
+
# Match numbers to pairs
|
103
|
+
result = {
|
104
|
+
grade_rows: grade_rows,
|
105
|
+
bg_pairs: bg_pairs
|
106
|
+
}
|
107
|
+
|
108
|
+
ALL_CATEGORIES.each do |category|
|
109
|
+
result["#{category[:key]}_numbers".to_sym] = match_numbers_to_pairs(category_rows[category[:key]], bg_pairs)
|
110
|
+
end
|
111
|
+
|
112
|
+
result
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
116
|
+
def match_numbers_to_pairs(remaining_numbers, bg_pairs, threshold = 10.0)
|
117
|
+
numbers = {}
|
118
|
+
remaining = remaining_numbers.dup
|
119
|
+
|
120
|
+
bg_pairs.each do |x_mid, bg_pair|
|
121
|
+
b_x = bg_pair[0]['text_x'].to_f
|
122
|
+
g_x = bg_pair[1]['text_x'].to_f
|
123
|
+
|
124
|
+
# Find numbers closest to B and G positions
|
125
|
+
b_num = remaining.find { |row| (row['text_x'].to_f - b_x).abs < threshold }
|
126
|
+
remaining.delete(b_num) if b_num
|
127
|
+
|
128
|
+
g_num = remaining.find { |row| (row['text_x'].to_f - g_x).abs < threshold }
|
129
|
+
remaining.delete(g_num) if g_num
|
130
|
+
|
131
|
+
numbers[x_mid] = [b_num, g_num]
|
132
|
+
end
|
133
|
+
|
134
|
+
numbers
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
class EnrollmentHtmlWriter
|
2
|
+
def self.generate_html(data, html_path)
|
3
|
+
return unless data
|
4
|
+
|
5
|
+
grade_rows = data[:grade_rows]
|
6
|
+
bg_pairs = data[:bg_pairs]
|
7
|
+
|
8
|
+
categories = [
|
9
|
+
['Gen', data[:gen_numbers]],
|
10
|
+
['SC', data[:sc_numbers]],
|
11
|
+
['ST', data[:st_numbers]],
|
12
|
+
['OBC', data[:obc_numbers]],
|
13
|
+
['Muslim', data[:musl_numbers]],
|
14
|
+
['Christian', data[:chris_numbers]],
|
15
|
+
['Sikh', data[:sikh_numbers]],
|
16
|
+
['Buddhist', data[:budd_numbers]],
|
17
|
+
['Parsi', data[:parsi_numbers]],
|
18
|
+
['Jain', data[:jain_numbers]],
|
19
|
+
['Others', data[:others_numbers]],
|
20
|
+
['Aadhaar', data[:aadh_numbers]],
|
21
|
+
['BPL', data[:bpl_numbers]],
|
22
|
+
['Repeater', data[:rept_numbers]],
|
23
|
+
['CWSN', data[:cwsn_numbers]]
|
24
|
+
]
|
25
|
+
|
26
|
+
ages = (3..22).map do |age|
|
27
|
+
["Age #{age}", data[:"age_#{age}_numbers"]]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Generate table rows for all categories and ages
|
31
|
+
table_rows = (categories + ages).map do |category, numbers|
|
32
|
+
cells = bg_pairs.map do |x_mid, _|
|
33
|
+
nums = numbers[x_mid]
|
34
|
+
b_num = nums&.first
|
35
|
+
g_num = nums&.last
|
36
|
+
"<td>#{b_num ? b_num['text'] : ''}</td><td>#{g_num ? g_num['text'] : ''}</td>"
|
37
|
+
end.join
|
38
|
+
|
39
|
+
" <tr>\n" \
|
40
|
+
" <td class=\"category\">#{category}</td>\n" \
|
41
|
+
" #{cells}\n" \
|
42
|
+
" </tr>"
|
43
|
+
end.join("\n")
|
44
|
+
|
45
|
+
# Generate grade headers
|
46
|
+
grade_headers = grade_rows.map { |row| "<th colspan='2'>#{row['text']}</th>" }.join
|
47
|
+
bg_headers = grade_rows.map { |_| "<td>B</td><td>G</td>" }.join
|
48
|
+
|
49
|
+
html_content = <<~HTML
|
50
|
+
<!DOCTYPE html>
|
51
|
+
<html>
|
52
|
+
<head>
|
53
|
+
<title>Enrollment Table</title>
|
54
|
+
<style>
|
55
|
+
table { border-collapse: collapse; margin-top: 20px; width: 100%; }
|
56
|
+
th, td { border: 1px solid black; padding: 8px; text-align: center; }
|
57
|
+
.header { font-weight: bold; background-color: #f0f0f0; }
|
58
|
+
.grade { font-weight: bold; background-color: #e0e0e0; }
|
59
|
+
.bg-pair { background-color: #f8f8f8; }
|
60
|
+
.category { font-weight: bold; text-align: left; }
|
61
|
+
</style>
|
62
|
+
</head>
|
63
|
+
<body>
|
64
|
+
<h2>Enrolment (By Social Category)</h2>
|
65
|
+
<table>
|
66
|
+
<tr class="grade">
|
67
|
+
<th rowspan="2">Category</th>
|
68
|
+
#{grade_headers}
|
69
|
+
</tr>
|
70
|
+
<tr class="bg-pair">
|
71
|
+
#{bg_headers}
|
72
|
+
</tr>
|
73
|
+
#{table_rows}
|
74
|
+
</table>
|
75
|
+
</body>
|
76
|
+
</html>
|
77
|
+
HTML
|
78
|
+
|
79
|
+
File.write(html_path, html_content)
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
class EnrollmentYamlWriter
|
2
|
+
def self.format_yaml(data)
|
3
|
+
return unless data
|
4
|
+
|
5
|
+
grade_rows = data[:grade_rows]
|
6
|
+
bg_pairs = data[:bg_pairs]
|
7
|
+
|
8
|
+
categories = {
|
9
|
+
'gen' => data[:gen_numbers],
|
10
|
+
'sc' => data[:sc_numbers],
|
11
|
+
'st' => data[:st_numbers],
|
12
|
+
'obc' => data[:obc_numbers],
|
13
|
+
'muslim' => data[:musl_numbers],
|
14
|
+
'christian' => data[:chris_numbers],
|
15
|
+
'sikh' => data[:sikh_numbers],
|
16
|
+
'buddhist' => data[:budd_numbers],
|
17
|
+
'parsi' => data[:parsi_numbers],
|
18
|
+
'jain' => data[:jain_numbers],
|
19
|
+
'others' => data[:others_numbers],
|
20
|
+
'aadhaar' => data[:aadh_numbers],
|
21
|
+
'bpl' => data[:bpl_numbers],
|
22
|
+
'repeater' => data[:rept_numbers],
|
23
|
+
'cwsn' => data[:cwsn_numbers]
|
24
|
+
}
|
25
|
+
|
26
|
+
yaml_data = {}
|
27
|
+
|
28
|
+
categories.each do |category, numbers|
|
29
|
+
yaml_data[category] = {}
|
30
|
+
bg_pairs.each_with_index do |(x_mid, _), index|
|
31
|
+
next unless grade_rows[index] && grade_rows[index]['text']
|
32
|
+
grade_name = grade_rows[index]['text'].downcase.gsub(' ', '_')
|
33
|
+
nums = numbers&.[](x_mid)
|
34
|
+
boys_text = nums&.first&.[]('text')&.strip
|
35
|
+
girls_text = nums&.last&.[]('text')&.strip
|
36
|
+
yaml_data[category][grade_name] = {
|
37
|
+
'boys' => boys_text.nil? || boys_text.empty? ? nil : boys_text.to_i,
|
38
|
+
'girls' => girls_text.nil? || girls_text.empty? ? nil : girls_text.to_i
|
39
|
+
}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Handle age data
|
44
|
+
(3..22).each do |age|
|
45
|
+
age_numbers = data[:"age_#{age}_numbers"]
|
46
|
+
yaml_data["age_#{age}"] = {}
|
47
|
+
bg_pairs.each_with_index do |(x_mid, _), index|
|
48
|
+
next unless grade_rows[index] && grade_rows[index]['text']
|
49
|
+
grade_name = grade_rows[index]['text'].downcase.gsub(' ', '_')
|
50
|
+
nums = age_numbers&.[](x_mid)
|
51
|
+
boys_text = nums&.first&.[]('text')&.strip
|
52
|
+
girls_text = nums&.last&.[]('text')&.strip
|
53
|
+
yaml_data["age_#{age}"][grade_name] = {
|
54
|
+
'boys' => boys_text.nil? || boys_text.empty? ? nil : boys_text.to_i,
|
55
|
+
'girls' => girls_text.nil? || girls_text.empty? ? nil : girls_text.to_i
|
56
|
+
}
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
yaml_data
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
class EwsDataReader
|
2
|
+
GRADES = [
|
3
|
+
'Pre-Pri.', 'Class I', 'Class II', 'Class III', 'Class IV', 'Class V',
|
4
|
+
'Class VI', 'Class VII', 'Class VIII', 'Class IX', 'Class X', 'Class XI', 'Class XII'
|
5
|
+
]
|
6
|
+
|
7
|
+
def self.read(csv_path) = new(csv_path).read
|
8
|
+
|
9
|
+
def initialize(csv_path)
|
10
|
+
@csv_path = csv_path
|
11
|
+
@rows = Hash.new { |h, k| h[k] = [] }
|
12
|
+
|
13
|
+
# Group cells by rect_y and rect_x
|
14
|
+
CSV.foreach(@csv_path, headers: true) do |cell|
|
15
|
+
next unless cell['page'] == '1'
|
16
|
+
|
17
|
+
rect_y = cell['rect_y'].to_f
|
18
|
+
@rows[rect_y] << cell
|
19
|
+
end
|
20
|
+
|
21
|
+
# Find the title row
|
22
|
+
@title_row = @rows.find { |_, cells| cells.any? { |cell| cell&.dig('text')&.include?('Total no. of Economically Weaker Section*(EWS) students Enrolled in Schools') } }
|
23
|
+
|
24
|
+
title_y = @title_row&.first
|
25
|
+
return unless title_y
|
26
|
+
|
27
|
+
# Get all rows below title in descending order
|
28
|
+
rows_after_title = @rows.select { |y, _| y < title_y.to_f }
|
29
|
+
.sort_by(&:first)
|
30
|
+
.reverse
|
31
|
+
|
32
|
+
# Get the next 3 rows after title
|
33
|
+
return unless rows_after_title.size >= 3
|
34
|
+
|
35
|
+
@grades_row = rows_after_title[0].last
|
36
|
+
@bg_row = rows_after_title[1].last
|
37
|
+
@values_row = rows_after_title[2].last
|
38
|
+
|
39
|
+
# Sort cells within each row by x coordinate
|
40
|
+
[@grades_row, @bg_row].each do |row|
|
41
|
+
next unless row
|
42
|
+
row.sort_by! { |cell| cell['text_x'].to_f }
|
43
|
+
end
|
44
|
+
|
45
|
+
# For values row, ensure we have a value for each B/G pair
|
46
|
+
if @values_row && @bg_row
|
47
|
+
sorted_values = []
|
48
|
+
@bg_row.each_slice(2) do |b, g|
|
49
|
+
b_x = b['text_x'].to_f
|
50
|
+
g_x = g['text_x'].to_f
|
51
|
+
|
52
|
+
# Find or create value for boys
|
53
|
+
b_val = @values_row.find { |cell| (cell['text_x'].to_f - b_x).abs < 10.0 }
|
54
|
+
b_val ||= { 'text' => '-', 'text_x' => b_x }
|
55
|
+
sorted_values << b_val
|
56
|
+
|
57
|
+
# Find or create value for girls
|
58
|
+
g_val = @values_row.find { |cell| (cell['text_x'].to_f - g_x).abs < 10.0 }
|
59
|
+
g_val ||= { 'text' => '-', 'text_x' => g_x }
|
60
|
+
sorted_values << g_val
|
61
|
+
end
|
62
|
+
@values_row = sorted_values
|
63
|
+
end
|
64
|
+
|
65
|
+
# Normalize empty values to "-"
|
66
|
+
@values_row&.each { |cell| cell['text'] = '-' if cell['text'].strip.empty? }
|
67
|
+
|
68
|
+
# Ensure we have all grades
|
69
|
+
found_grades = @grades_row.map { |cell| cell['text'] }
|
70
|
+
missing_grades = GRADES - found_grades
|
71
|
+
if missing_grades.any?
|
72
|
+
# Removed puts statement
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def read
|
77
|
+
return nil unless @grades_row && @bg_row && @values_row
|
78
|
+
|
79
|
+
# Group B,G pairs, ensuring we have complete pairs
|
80
|
+
bg_pairs = {}
|
81
|
+
@bg_row.each_slice(2) do |pair|
|
82
|
+
next unless pair.size == 2 && pair[0] && pair[1] # Skip incomplete pairs
|
83
|
+
b, g = pair
|
84
|
+
x_mid = (b['text_x'].to_f + g['text_x'].to_f) / 2
|
85
|
+
bg_pairs[x_mid] = [b, g]
|
86
|
+
end
|
87
|
+
|
88
|
+
# Match numbers to pairs
|
89
|
+
{
|
90
|
+
grade_rows: @grades_row,
|
91
|
+
bg_pairs: bg_pairs,
|
92
|
+
ews_numbers: match_numbers_to_pairs(@values_row, bg_pairs),
|
93
|
+
}
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
def match_numbers_to_pairs(remaining_numbers, bg_pairs, threshold = 10.0)
|
98
|
+
numbers = {}
|
99
|
+
remaining = remaining_numbers.dup
|
100
|
+
|
101
|
+
bg_pairs.each do |x_mid, bg_pair|
|
102
|
+
next unless bg_pair && bg_pair.size == 2 # Skip invalid pairs
|
103
|
+
b_x = bg_pair[0]['text_x'].to_f
|
104
|
+
g_x = bg_pair[1]['text_x'].to_f
|
105
|
+
|
106
|
+
# Find numbers closest to B and G positions
|
107
|
+
b_num = remaining.find { |cell| (cell['text_x'].to_f - b_x).abs < threshold }
|
108
|
+
remaining.delete(b_num) if b_num
|
109
|
+
|
110
|
+
g_num = remaining.find { |cell| (cell['text_x'].to_f - g_x).abs < threshold }
|
111
|
+
remaining.delete(g_num) if g_num
|
112
|
+
|
113
|
+
numbers[x_mid] = [b_num, g_num]
|
114
|
+
end
|
115
|
+
|
116
|
+
numbers
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
class EwsHtmlWriter
|
2
|
+
def self.generate_html(data, html_path)
|
3
|
+
return unless data
|
4
|
+
|
5
|
+
grade_rows = data[:grade_rows]
|
6
|
+
bg_pairs = data[:bg_pairs]
|
7
|
+
|
8
|
+
categories = [
|
9
|
+
['EWS', data[:ews_numbers]],
|
10
|
+
]
|
11
|
+
|
12
|
+
# Generate table rows for all categories
|
13
|
+
table_rows = categories.map do |category, numbers|
|
14
|
+
cells = bg_pairs.map do |x_mid, _|
|
15
|
+
nums = numbers[x_mid]
|
16
|
+
b_num = nums&.first
|
17
|
+
g_num = nums&.last
|
18
|
+
"<td>#{b_num ? b_num['text'] : ''}</td><td>#{g_num ? g_num['text'] : ''}</td>"
|
19
|
+
end.join
|
20
|
+
|
21
|
+
" <tr>\n" \
|
22
|
+
" <td class=\"category\">#{category}</td>\n" \
|
23
|
+
" #{cells}\n" \
|
24
|
+
" </tr>"
|
25
|
+
end.join("\n")
|
26
|
+
|
27
|
+
# Generate grade headers
|
28
|
+
grade_headers = grade_rows.map { |row| "<th colspan='2'>#{row['text']}</th>" }.join
|
29
|
+
bg_headers = grade_rows.map { |_| "<td>B</td><td>G</td>" }.join
|
30
|
+
|
31
|
+
html_content = <<~HTML
|
32
|
+
<!DOCTYPE html>
|
33
|
+
<html>
|
34
|
+
<head>
|
35
|
+
<title>Enrollment Table</title>
|
36
|
+
<style>
|
37
|
+
table { border-collapse: collapse; margin-top: 20px; width: 100%; }
|
38
|
+
th, td { border: 1px solid black; padding: 8px; text-align: center; }
|
39
|
+
.header { font-weight: bold; background-color: #f0f0f0; }
|
40
|
+
.grade { font-weight: bold; background-color: #e0e0e0; }
|
41
|
+
.bg-pair { background-color: #f8f8f8; }
|
42
|
+
.category { font-weight: bold; text-align: left; }
|
43
|
+
</style>
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
<h2>Enrolment (By Social Category)</h2>
|
47
|
+
<table>
|
48
|
+
<tr class="grade">
|
49
|
+
<th rowspan="2">Category</th>
|
50
|
+
#{grade_headers}
|
51
|
+
</tr>
|
52
|
+
<tr class="bg-pair">
|
53
|
+
#{bg_headers}
|
54
|
+
</tr>
|
55
|
+
#{table_rows}
|
56
|
+
</table>
|
57
|
+
</body>
|
58
|
+
</html>
|
59
|
+
HTML
|
60
|
+
|
61
|
+
File.write(html_path, html_content)
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
class EwsYamlWriter
|
2
|
+
def self.format_yaml(data)
|
3
|
+
return unless data
|
4
|
+
|
5
|
+
grade_rows = data[:grade_rows]
|
6
|
+
bg_pairs = data[:bg_pairs]
|
7
|
+
|
8
|
+
categories = {
|
9
|
+
'ews' => data[:ews_numbers],
|
10
|
+
}
|
11
|
+
|
12
|
+
yaml_data = {}
|
13
|
+
|
14
|
+
categories.each do |category, numbers|
|
15
|
+
yaml_data[category] = {}
|
16
|
+
bg_pairs.each_with_index do |(x_mid, _), index|
|
17
|
+
next unless grade_rows[index] && grade_rows[index]['text']
|
18
|
+
grade_name = grade_rows[index]['text'].downcase.gsub(' ', '_')
|
19
|
+
nums = numbers&.[](x_mid)
|
20
|
+
boys_text = nums&.first&.[]('text')&.strip
|
21
|
+
girls_text = nums&.last&.[]('text')&.strip
|
22
|
+
yaml_data[category][grade_name] = {
|
23
|
+
'boys' => boys_text.nil? || boys_text.empty? ? nil : boys_text.to_i,
|
24
|
+
'girls' => girls_text.nil? || girls_text.empty? ? nil : girls_text.to_i
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
yaml_data
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class LocationDataReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'State' => {
|
8
|
+
key_path: ['location', 'state'],
|
9
|
+
end_pattern: /District/
|
10
|
+
},
|
11
|
+
'District' => {
|
12
|
+
key_path: ['location', 'district'],
|
13
|
+
end_pattern: /Block/
|
14
|
+
},
|
15
|
+
'Block' => {
|
16
|
+
key_path: ['location', 'block'],
|
17
|
+
end_pattern: /Rural/
|
18
|
+
},
|
19
|
+
'Rural / Urban' => {
|
20
|
+
key_path: ['location', 'area_type'],
|
21
|
+
end_pattern: /Cluster/
|
22
|
+
},
|
23
|
+
'Pincode' => {
|
24
|
+
key_path: ['location', 'pincode']
|
25
|
+
},
|
26
|
+
'Ward' => {
|
27
|
+
key_path: ['location', 'ward'],
|
28
|
+
end_pattern: /Mohalla/
|
29
|
+
},
|
30
|
+
'Cluster' => {
|
31
|
+
key_path: ['location', 'cluster'],
|
32
|
+
end_pattern: /Ward/
|
33
|
+
},
|
34
|
+
'Municipality' => {
|
35
|
+
key_path: ['location', 'municipality'],
|
36
|
+
end_pattern: /Assembly/
|
37
|
+
},
|
38
|
+
'Assembly Const.' => {
|
39
|
+
key_path: ['location', 'assembly_constituency'],
|
40
|
+
end_pattern: /Parl/
|
41
|
+
},
|
42
|
+
'Parl. Constituency' => {
|
43
|
+
key_path: ['location', 'parliamentary_constituency'],
|
44
|
+
end_pattern: /School/
|
45
|
+
}
|
46
|
+
}
|
47
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class OfficialDataReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'Year of Establishment' => {
|
8
|
+
key_path: ['official', 'established'],
|
9
|
+
value_type: :integer
|
10
|
+
},
|
11
|
+
'Year of Recognition-Pri.' => {
|
12
|
+
key_path: ['official', 'recognition', 'primary'],
|
13
|
+
value_type: :integer
|
14
|
+
},
|
15
|
+
'Year of Recognition-Upr.Pri.' => {
|
16
|
+
key_path: ['official', 'recognition', 'upper_primary'],
|
17
|
+
value_type: :integer
|
18
|
+
},
|
19
|
+
'Year of Recognition-Sec.' => {
|
20
|
+
key_path: ['official', 'recognition', 'secondary'],
|
21
|
+
value_type: :integer
|
22
|
+
},
|
23
|
+
'Year of Recognition-Higher Sec.' => {
|
24
|
+
key_path: ['official', 'recognition', 'higher_secondary'],
|
25
|
+
value_type: :integer
|
26
|
+
},
|
27
|
+
'Affiliation Board-Sec' => {
|
28
|
+
key_path: ['official', 'affiliation', 'secondary'],
|
29
|
+
end_pattern: /Affiliation Board-HSec/
|
30
|
+
},
|
31
|
+
'Affiliation Board-HSec' => {
|
32
|
+
key_path: ['official', 'affiliation', 'higher_secondary'],
|
33
|
+
end_pattern: /Is this/
|
34
|
+
},
|
35
|
+
'School Management' => {
|
36
|
+
key_path: ['official', 'management'],
|
37
|
+
end_pattern: /School Type/
|
38
|
+
}
|
39
|
+
}
|
40
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class PDFBlockExtractor
|
2
|
+
def self.extract_blocks(reader)
|
3
|
+
blocks = []
|
4
|
+
|
5
|
+
reader.pages.each_with_index do |page, index|
|
6
|
+
page_number = index + 1
|
7
|
+
current_block = {}
|
8
|
+
|
9
|
+
page.raw_content.each_line do |line|
|
10
|
+
if line.include?('BT')
|
11
|
+
current_block = {
|
12
|
+
page: page_number,
|
13
|
+
start_line: line.strip,
|
14
|
+
text: [] # Initialize as array to collect multiple text blocks
|
15
|
+
}
|
16
|
+
elsif line.match?(/1\s+0\s+0\s+1\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+Tm/)
|
17
|
+
# Only set coordinates if not already set
|
18
|
+
unless current_block[:x] && current_block[:y]
|
19
|
+
matches = line.match(/1\s+0\s+0\s+1\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+Tm/)
|
20
|
+
current_block[:x] = matches[1].to_f
|
21
|
+
current_block[:y] = matches[2].to_f
|
22
|
+
end
|
23
|
+
elsif line.match?(/\/F(\d+)\s+(\d+(\.\d+)?)\s+Tf/)
|
24
|
+
# Only set font if not already set
|
25
|
+
unless current_block[:font] && current_block[:font_size]
|
26
|
+
matches = line.match(/\/F(\d+)\s+(\d+(\.\d+)?)\s+Tf/)
|
27
|
+
current_block[:font] = "F#{matches[1]}"
|
28
|
+
current_block[:font_size] = matches[2].to_f
|
29
|
+
end
|
30
|
+
elsif line.match?(/\((.*?)\)\s*Tj/)
|
31
|
+
# Collect all text blocks, remove escape characters
|
32
|
+
text = line.match(/\((.*?)\)\s*Tj/)[1]
|
33
|
+
text = text.gsub(/\\/, '') # Remove escape characters
|
34
|
+
current_block[:text] << text
|
35
|
+
elsif line.include?('ET')
|
36
|
+
current_block[:end_line] = line.strip
|
37
|
+
# Join all text blocks with space
|
38
|
+
current_block[:text] = current_block[:text].join(' ')
|
39
|
+
# Only add non-empty blocks with coordinates
|
40
|
+
if !current_block[:text].empty? && current_block[:x] && current_block[:y]
|
41
|
+
blocks << current_block.dup
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
blocks
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
class PDFContentCompressor
|
2
|
+
def self.compress(content)
|
3
|
+
compressed = []
|
4
|
+
current_block = []
|
5
|
+
in_bt_block = false
|
6
|
+
current_text = ""
|
7
|
+
|
8
|
+
content.each_line do |line|
|
9
|
+
if line.include?('BT')
|
10
|
+
in_bt_block = true
|
11
|
+
current_block = []
|
12
|
+
current_text = ""
|
13
|
+
elsif line.include?('ET')
|
14
|
+
in_bt_block = false
|
15
|
+
current_text = current_block.join("")
|
16
|
+
compressed << current_text unless current_text.empty?
|
17
|
+
elsif in_bt_block && line =~ /\((.*?)\)\s*Tj/
|
18
|
+
# Extract text between (...) followed by Tj
|
19
|
+
text = $1.strip
|
20
|
+
if text =~ /^(?:Non|Residenti|al|Digit|al Facil|ities)$/
|
21
|
+
# Special handling for split text
|
22
|
+
current_text += text
|
23
|
+
current_block << text
|
24
|
+
else
|
25
|
+
if !current_text.empty?
|
26
|
+
compressed << current_text
|
27
|
+
end
|
28
|
+
current_text = text
|
29
|
+
current_block = [text]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
compressed.reject(&:empty?).join("\n")
|
35
|
+
end
|
36
|
+
end
|