udise_school_report_reader 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +20 -0
- data/LICENSE.txt +21 -0
- data/README.md +45 -0
- data/lib/udise_school_report_reader/activities_data_reader.rb +58 -0
- data/lib/udise_school_report_reader/anganwadi_data_reader.rb +22 -0
- data/lib/udise_school_report_reader/basic_info_data_reader.rb +29 -0
- data/lib/udise_school_report_reader/block_rectangle_combiner.rb +115 -0
- data/lib/udise_school_report_reader/building_data_reader.rb +36 -0
- data/lib/udise_school_report_reader/characteristics_reader.rb +28 -0
- data/lib/udise_school_report_reader/csv_writer.rb +75 -0
- data/lib/udise_school_report_reader/data_reader_base.rb +86 -0
- data/lib/udise_school_report_reader/digital_facilities_data_reader.rb +42 -0
- data/lib/udise_school_report_reader/enrollment_data_reader.rb +136 -0
- data/lib/udise_school_report_reader/enrollment_html_writer.rb +81 -0
- data/lib/udise_school_report_reader/enrollment_yaml_writer.rb +62 -0
- data/lib/udise_school_report_reader/ews_data_reader.rb +118 -0
- data/lib/udise_school_report_reader/ews_html_writer.rb +63 -0
- data/lib/udise_school_report_reader/ews_yaml_writer.rb +31 -0
- data/lib/udise_school_report_reader/location_data_reader.rb +47 -0
- data/lib/udise_school_report_reader/official_data_reader.rb +40 -0
- data/lib/udise_school_report_reader/pdf_block_extractor.rb +49 -0
- data/lib/udise_school_report_reader/pdf_content_compressor.rb +36 -0
- data/lib/udise_school_report_reader/pdf_rectangle_extractor.rb +53 -0
- data/lib/udise_school_report_reader/rooms_data_reader.rb +36 -0
- data/lib/udise_school_report_reader/rte_data_reader.rb +118 -0
- data/lib/udise_school_report_reader/rte_html_writer.rb +63 -0
- data/lib/udise_school_report_reader/rte_yaml_writer.rb +61 -0
- data/lib/udise_school_report_reader/sanitation_data_reader.rb +56 -0
- data/lib/udise_school_report_reader/school_report_parser.rb +295 -0
- data/lib/udise_school_report_reader/teacher_data_reader.rb +204 -0
- data/lib/udise_school_report_reader/version.rb +3 -0
- data/lib/udise_school_report_reader.rb +41 -0
- data/test/school_report_parser_test.rb +62 -0
- metadata +165 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
class EnrollmentDataReader
|
2
|
+
ALL_CATEGORIES = [
|
3
|
+
SOCIAL_CATEGORIES = [
|
4
|
+
{ key: 'gen', label: 'Gen' },
|
5
|
+
{ key: 'sc', label: 'SC' },
|
6
|
+
{ key: 'st', label: 'ST' },
|
7
|
+
{ key: 'obc', label: 'OBC' }
|
8
|
+
].freeze,
|
9
|
+
RELIGION_CATEGORIES = [
|
10
|
+
{ key: 'musl', label: 'Musl' },
|
11
|
+
{ key: 'chris', label: 'Chris' },
|
12
|
+
{ key: 'sikh', label: 'Sikh' },
|
13
|
+
{ key: 'budd', label: 'Budd' },
|
14
|
+
{ key: 'parsi', label: 'Parsi' },
|
15
|
+
{ key: 'jain', label: 'Jain' },
|
16
|
+
{ key: 'others', label: 'Others' }
|
17
|
+
].freeze,
|
18
|
+
OTHER_CATEGORIES = [
|
19
|
+
{ key: 'aadh', label: 'Aadh' },
|
20
|
+
{ key: 'bpl', label: 'BPL' },
|
21
|
+
{ key: 'rept', label: 'Rept' },
|
22
|
+
{ key: 'cwsn', label: 'CWSN' }
|
23
|
+
].freeze,
|
24
|
+
AGE_CATEGORIES = (3..22).map do |age|
|
25
|
+
{ key: "age_#{age}", label: age == 3 ? '>3' : age.to_s }
|
26
|
+
end.freeze,
|
27
|
+
].flatten.freeze
|
28
|
+
|
29
|
+
def self.read(csv_path) = new(csv_path).read
|
30
|
+
|
31
|
+
def initialize(csv_path)
|
32
|
+
@csv_path = csv_path
|
33
|
+
@x_cutoff = 0
|
34
|
+
@category_y_coords = {}
|
35
|
+
end
|
36
|
+
|
37
|
+
def read
|
38
|
+
# Initialize arrays for different row types
|
39
|
+
grade_rows = []
|
40
|
+
bg_rows = []
|
41
|
+
category_rows = {}
|
42
|
+
|
43
|
+
ALL_CATEGORIES.each do |category|
|
44
|
+
category_rows[category[:key]] = []
|
45
|
+
end
|
46
|
+
|
47
|
+
# First pass to collect y-coordinates for categories
|
48
|
+
CSV.foreach(@csv_path, headers: true) do |row|
|
49
|
+
if row['page'] == '2' && (row['rect_x'].to_f - 27.0).abs < 5.0
|
50
|
+
ALL_CATEGORIES.each do |category|
|
51
|
+
if row['text'].downcase == category[:label].downcase
|
52
|
+
@category_y_coords[category[:key]] = row['rect_y'].to_f
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
CSV.foreach(@csv_path, headers: true) do |row|
|
59
|
+
if row['page'] == '2'
|
60
|
+
if row['text'] == "Total" && row['rect_y'].to_f == 778.0
|
61
|
+
@x_cutoff = row['rect_x'].to_f
|
62
|
+
end
|
63
|
+
|
64
|
+
if ['Pre-Pr', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X', 'XI', 'XII'].include?(row['text'])
|
65
|
+
if row['text_y'].to_f == 780.0
|
66
|
+
grade_rows << row
|
67
|
+
end
|
68
|
+
elsif ['B', 'G'].include?(row['text'])
|
69
|
+
if row['text_y'].to_f == 768.0
|
70
|
+
bg_rows << row
|
71
|
+
end
|
72
|
+
elsif row['text'] =~ /^\d+$/
|
73
|
+
y_coord = row['rect_y'].to_f
|
74
|
+
ALL_CATEGORIES.each do |category|
|
75
|
+
if @category_y_coords[category[:key]] && (y_coord - @category_y_coords[category[:key]]).abs < 5.0
|
76
|
+
category_rows[category[:key]] << row
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
return nil if grade_rows.empty?
|
84
|
+
|
85
|
+
# Sort and filter rows
|
86
|
+
[grade_rows, bg_rows].each do |rows|
|
87
|
+
rows.sort_by! { |row| row['text_x'].to_f }
|
88
|
+
rows.reject! { |row| row['text_x'].to_f >= @x_cutoff }
|
89
|
+
end
|
90
|
+
|
91
|
+
category_rows.values.each do |rows|
|
92
|
+
rows.sort_by! { |row| row['text_x'].to_f }
|
93
|
+
rows.reject! { |row| row['text_x'].to_f >= @x_cutoff }
|
94
|
+
end
|
95
|
+
|
96
|
+
# Group B,G pairs
|
97
|
+
bg_pairs = bg_rows.each_slice(2).map do |b, g|
|
98
|
+
x_mid = (b['text_x'].to_f + g['text_x'].to_f) / 2
|
99
|
+
[x_mid, [b, g]]
|
100
|
+
end.to_h
|
101
|
+
|
102
|
+
# Match numbers to pairs
|
103
|
+
result = {
|
104
|
+
grade_rows: grade_rows,
|
105
|
+
bg_pairs: bg_pairs
|
106
|
+
}
|
107
|
+
|
108
|
+
ALL_CATEGORIES.each do |category|
|
109
|
+
result["#{category[:key]}_numbers".to_sym] = match_numbers_to_pairs(category_rows[category[:key]], bg_pairs)
|
110
|
+
end
|
111
|
+
|
112
|
+
result
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
116
|
+
def match_numbers_to_pairs(remaining_numbers, bg_pairs, threshold = 10.0)
|
117
|
+
numbers = {}
|
118
|
+
remaining = remaining_numbers.dup
|
119
|
+
|
120
|
+
bg_pairs.each do |x_mid, bg_pair|
|
121
|
+
b_x = bg_pair[0]['text_x'].to_f
|
122
|
+
g_x = bg_pair[1]['text_x'].to_f
|
123
|
+
|
124
|
+
# Find numbers closest to B and G positions
|
125
|
+
b_num = remaining.find { |row| (row['text_x'].to_f - b_x).abs < threshold }
|
126
|
+
remaining.delete(b_num) if b_num
|
127
|
+
|
128
|
+
g_num = remaining.find { |row| (row['text_x'].to_f - g_x).abs < threshold }
|
129
|
+
remaining.delete(g_num) if g_num
|
130
|
+
|
131
|
+
numbers[x_mid] = [b_num, g_num]
|
132
|
+
end
|
133
|
+
|
134
|
+
numbers
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
class EnrollmentHtmlWriter
|
2
|
+
def self.generate_html(data, html_path)
|
3
|
+
return unless data
|
4
|
+
|
5
|
+
grade_rows = data[:grade_rows]
|
6
|
+
bg_pairs = data[:bg_pairs]
|
7
|
+
|
8
|
+
categories = [
|
9
|
+
['Gen', data[:gen_numbers]],
|
10
|
+
['SC', data[:sc_numbers]],
|
11
|
+
['ST', data[:st_numbers]],
|
12
|
+
['OBC', data[:obc_numbers]],
|
13
|
+
['Muslim', data[:musl_numbers]],
|
14
|
+
['Christian', data[:chris_numbers]],
|
15
|
+
['Sikh', data[:sikh_numbers]],
|
16
|
+
['Buddhist', data[:budd_numbers]],
|
17
|
+
['Parsi', data[:parsi_numbers]],
|
18
|
+
['Jain', data[:jain_numbers]],
|
19
|
+
['Others', data[:others_numbers]],
|
20
|
+
['Aadhaar', data[:aadh_numbers]],
|
21
|
+
['BPL', data[:bpl_numbers]],
|
22
|
+
['Repeater', data[:rept_numbers]],
|
23
|
+
['CWSN', data[:cwsn_numbers]]
|
24
|
+
]
|
25
|
+
|
26
|
+
ages = (3..22).map do |age|
|
27
|
+
["Age #{age}", data[:"age_#{age}_numbers"]]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Generate table rows for all categories and ages
|
31
|
+
table_rows = (categories + ages).map do |category, numbers|
|
32
|
+
cells = bg_pairs.map do |x_mid, _|
|
33
|
+
nums = numbers[x_mid]
|
34
|
+
b_num = nums&.first
|
35
|
+
g_num = nums&.last
|
36
|
+
"<td>#{b_num ? b_num['text'] : ''}</td><td>#{g_num ? g_num['text'] : ''}</td>"
|
37
|
+
end.join
|
38
|
+
|
39
|
+
" <tr>\n" \
|
40
|
+
" <td class=\"category\">#{category}</td>\n" \
|
41
|
+
" #{cells}\n" \
|
42
|
+
" </tr>"
|
43
|
+
end.join("\n")
|
44
|
+
|
45
|
+
# Generate grade headers
|
46
|
+
grade_headers = grade_rows.map { |row| "<th colspan='2'>#{row['text']}</th>" }.join
|
47
|
+
bg_headers = grade_rows.map { |_| "<td>B</td><td>G</td>" }.join
|
48
|
+
|
49
|
+
html_content = <<~HTML
|
50
|
+
<!DOCTYPE html>
|
51
|
+
<html>
|
52
|
+
<head>
|
53
|
+
<title>Enrollment Table</title>
|
54
|
+
<style>
|
55
|
+
table { border-collapse: collapse; margin-top: 20px; width: 100%; }
|
56
|
+
th, td { border: 1px solid black; padding: 8px; text-align: center; }
|
57
|
+
.header { font-weight: bold; background-color: #f0f0f0; }
|
58
|
+
.grade { font-weight: bold; background-color: #e0e0e0; }
|
59
|
+
.bg-pair { background-color: #f8f8f8; }
|
60
|
+
.category { font-weight: bold; text-align: left; }
|
61
|
+
</style>
|
62
|
+
</head>
|
63
|
+
<body>
|
64
|
+
<h2>Enrolment (By Social Category)</h2>
|
65
|
+
<table>
|
66
|
+
<tr class="grade">
|
67
|
+
<th rowspan="2">Category</th>
|
68
|
+
#{grade_headers}
|
69
|
+
</tr>
|
70
|
+
<tr class="bg-pair">
|
71
|
+
#{bg_headers}
|
72
|
+
</tr>
|
73
|
+
#{table_rows}
|
74
|
+
</table>
|
75
|
+
</body>
|
76
|
+
</html>
|
77
|
+
HTML
|
78
|
+
|
79
|
+
File.write(html_path, html_content)
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
class EnrollmentYamlWriter
|
2
|
+
def self.format_yaml(data)
|
3
|
+
return unless data
|
4
|
+
|
5
|
+
grade_rows = data[:grade_rows]
|
6
|
+
bg_pairs = data[:bg_pairs]
|
7
|
+
|
8
|
+
categories = {
|
9
|
+
'gen' => data[:gen_numbers],
|
10
|
+
'sc' => data[:sc_numbers],
|
11
|
+
'st' => data[:st_numbers],
|
12
|
+
'obc' => data[:obc_numbers],
|
13
|
+
'muslim' => data[:musl_numbers],
|
14
|
+
'christian' => data[:chris_numbers],
|
15
|
+
'sikh' => data[:sikh_numbers],
|
16
|
+
'buddhist' => data[:budd_numbers],
|
17
|
+
'parsi' => data[:parsi_numbers],
|
18
|
+
'jain' => data[:jain_numbers],
|
19
|
+
'others' => data[:others_numbers],
|
20
|
+
'aadhaar' => data[:aadh_numbers],
|
21
|
+
'bpl' => data[:bpl_numbers],
|
22
|
+
'repeater' => data[:rept_numbers],
|
23
|
+
'cwsn' => data[:cwsn_numbers]
|
24
|
+
}
|
25
|
+
|
26
|
+
yaml_data = {}
|
27
|
+
|
28
|
+
categories.each do |category, numbers|
|
29
|
+
yaml_data[category] = {}
|
30
|
+
bg_pairs.each_with_index do |(x_mid, _), index|
|
31
|
+
next unless grade_rows[index] && grade_rows[index]['text']
|
32
|
+
grade_name = grade_rows[index]['text'].downcase.gsub(' ', '_')
|
33
|
+
nums = numbers&.[](x_mid)
|
34
|
+
boys_text = nums&.first&.[]('text')&.strip
|
35
|
+
girls_text = nums&.last&.[]('text')&.strip
|
36
|
+
yaml_data[category][grade_name] = {
|
37
|
+
'boys' => boys_text.nil? || boys_text.empty? ? nil : boys_text.to_i,
|
38
|
+
'girls' => girls_text.nil? || girls_text.empty? ? nil : girls_text.to_i
|
39
|
+
}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Handle age data
|
44
|
+
(3..22).each do |age|
|
45
|
+
age_numbers = data[:"age_#{age}_numbers"]
|
46
|
+
yaml_data["age_#{age}"] = {}
|
47
|
+
bg_pairs.each_with_index do |(x_mid, _), index|
|
48
|
+
next unless grade_rows[index] && grade_rows[index]['text']
|
49
|
+
grade_name = grade_rows[index]['text'].downcase.gsub(' ', '_')
|
50
|
+
nums = age_numbers&.[](x_mid)
|
51
|
+
boys_text = nums&.first&.[]('text')&.strip
|
52
|
+
girls_text = nums&.last&.[]('text')&.strip
|
53
|
+
yaml_data["age_#{age}"][grade_name] = {
|
54
|
+
'boys' => boys_text.nil? || boys_text.empty? ? nil : boys_text.to_i,
|
55
|
+
'girls' => girls_text.nil? || girls_text.empty? ? nil : girls_text.to_i
|
56
|
+
}
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
yaml_data
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
class EwsDataReader
|
2
|
+
GRADES = [
|
3
|
+
'Pre-Pri.', 'Class I', 'Class II', 'Class III', 'Class IV', 'Class V',
|
4
|
+
'Class VI', 'Class VII', 'Class VIII', 'Class IX', 'Class X', 'Class XI', 'Class XII'
|
5
|
+
]
|
6
|
+
|
7
|
+
def self.read(csv_path) = new(csv_path).read
|
8
|
+
|
9
|
+
def initialize(csv_path)
|
10
|
+
@csv_path = csv_path
|
11
|
+
@rows = Hash.new { |h, k| h[k] = [] }
|
12
|
+
|
13
|
+
# Group cells by rect_y and rect_x
|
14
|
+
CSV.foreach(@csv_path, headers: true) do |cell|
|
15
|
+
next unless cell['page'] == '1'
|
16
|
+
|
17
|
+
rect_y = cell['rect_y'].to_f
|
18
|
+
@rows[rect_y] << cell
|
19
|
+
end
|
20
|
+
|
21
|
+
# Find the title row
|
22
|
+
@title_row = @rows.find { |_, cells| cells.any? { |cell| cell&.dig('text')&.include?('Total no. of Economically Weaker Section*(EWS) students Enrolled in Schools') } }
|
23
|
+
|
24
|
+
title_y = @title_row&.first
|
25
|
+
return unless title_y
|
26
|
+
|
27
|
+
# Get all rows below title in descending order
|
28
|
+
rows_after_title = @rows.select { |y, _| y < title_y.to_f }
|
29
|
+
.sort_by(&:first)
|
30
|
+
.reverse
|
31
|
+
|
32
|
+
# Get the next 3 rows after title
|
33
|
+
return unless rows_after_title.size >= 3
|
34
|
+
|
35
|
+
@grades_row = rows_after_title[0].last
|
36
|
+
@bg_row = rows_after_title[1].last
|
37
|
+
@values_row = rows_after_title[2].last
|
38
|
+
|
39
|
+
# Sort cells within each row by x coordinate
|
40
|
+
[@grades_row, @bg_row].each do |row|
|
41
|
+
next unless row
|
42
|
+
row.sort_by! { |cell| cell['text_x'].to_f }
|
43
|
+
end
|
44
|
+
|
45
|
+
# For values row, ensure we have a value for each B/G pair
|
46
|
+
if @values_row && @bg_row
|
47
|
+
sorted_values = []
|
48
|
+
@bg_row.each_slice(2) do |b, g|
|
49
|
+
b_x = b['text_x'].to_f
|
50
|
+
g_x = g['text_x'].to_f
|
51
|
+
|
52
|
+
# Find or create value for boys
|
53
|
+
b_val = @values_row.find { |cell| (cell['text_x'].to_f - b_x).abs < 10.0 }
|
54
|
+
b_val ||= { 'text' => '-', 'text_x' => b_x }
|
55
|
+
sorted_values << b_val
|
56
|
+
|
57
|
+
# Find or create value for girls
|
58
|
+
g_val = @values_row.find { |cell| (cell['text_x'].to_f - g_x).abs < 10.0 }
|
59
|
+
g_val ||= { 'text' => '-', 'text_x' => g_x }
|
60
|
+
sorted_values << g_val
|
61
|
+
end
|
62
|
+
@values_row = sorted_values
|
63
|
+
end
|
64
|
+
|
65
|
+
# Normalize empty values to "-"
|
66
|
+
@values_row&.each { |cell| cell['text'] = '-' if cell['text'].strip.empty? }
|
67
|
+
|
68
|
+
# Ensure we have all grades
|
69
|
+
found_grades = @grades_row.map { |cell| cell['text'] }
|
70
|
+
missing_grades = GRADES - found_grades
|
71
|
+
if missing_grades.any?
|
72
|
+
# Removed puts statement
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def read
|
77
|
+
return nil unless @grades_row && @bg_row && @values_row
|
78
|
+
|
79
|
+
# Group B,G pairs, ensuring we have complete pairs
|
80
|
+
bg_pairs = {}
|
81
|
+
@bg_row.each_slice(2) do |pair|
|
82
|
+
next unless pair.size == 2 && pair[0] && pair[1] # Skip incomplete pairs
|
83
|
+
b, g = pair
|
84
|
+
x_mid = (b['text_x'].to_f + g['text_x'].to_f) / 2
|
85
|
+
bg_pairs[x_mid] = [b, g]
|
86
|
+
end
|
87
|
+
|
88
|
+
# Match numbers to pairs
|
89
|
+
{
|
90
|
+
grade_rows: @grades_row,
|
91
|
+
bg_pairs: bg_pairs,
|
92
|
+
ews_numbers: match_numbers_to_pairs(@values_row, bg_pairs),
|
93
|
+
}
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
def match_numbers_to_pairs(remaining_numbers, bg_pairs, threshold = 10.0)
|
98
|
+
numbers = {}
|
99
|
+
remaining = remaining_numbers.dup
|
100
|
+
|
101
|
+
bg_pairs.each do |x_mid, bg_pair|
|
102
|
+
next unless bg_pair && bg_pair.size == 2 # Skip invalid pairs
|
103
|
+
b_x = bg_pair[0]['text_x'].to_f
|
104
|
+
g_x = bg_pair[1]['text_x'].to_f
|
105
|
+
|
106
|
+
# Find numbers closest to B and G positions
|
107
|
+
b_num = remaining.find { |cell| (cell['text_x'].to_f - b_x).abs < threshold }
|
108
|
+
remaining.delete(b_num) if b_num
|
109
|
+
|
110
|
+
g_num = remaining.find { |cell| (cell['text_x'].to_f - g_x).abs < threshold }
|
111
|
+
remaining.delete(g_num) if g_num
|
112
|
+
|
113
|
+
numbers[x_mid] = [b_num, g_num]
|
114
|
+
end
|
115
|
+
|
116
|
+
numbers
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
class EwsHtmlWriter
|
2
|
+
def self.generate_html(data, html_path)
|
3
|
+
return unless data
|
4
|
+
|
5
|
+
grade_rows = data[:grade_rows]
|
6
|
+
bg_pairs = data[:bg_pairs]
|
7
|
+
|
8
|
+
categories = [
|
9
|
+
['EWS', data[:ews_numbers]],
|
10
|
+
]
|
11
|
+
|
12
|
+
# Generate table rows for all categories
|
13
|
+
table_rows = categories.map do |category, numbers|
|
14
|
+
cells = bg_pairs.map do |x_mid, _|
|
15
|
+
nums = numbers[x_mid]
|
16
|
+
b_num = nums&.first
|
17
|
+
g_num = nums&.last
|
18
|
+
"<td>#{b_num ? b_num['text'] : ''}</td><td>#{g_num ? g_num['text'] : ''}</td>"
|
19
|
+
end.join
|
20
|
+
|
21
|
+
" <tr>\n" \
|
22
|
+
" <td class=\"category\">#{category}</td>\n" \
|
23
|
+
" #{cells}\n" \
|
24
|
+
" </tr>"
|
25
|
+
end.join("\n")
|
26
|
+
|
27
|
+
# Generate grade headers
|
28
|
+
grade_headers = grade_rows.map { |row| "<th colspan='2'>#{row['text']}</th>" }.join
|
29
|
+
bg_headers = grade_rows.map { |_| "<td>B</td><td>G</td>" }.join
|
30
|
+
|
31
|
+
html_content = <<~HTML
|
32
|
+
<!DOCTYPE html>
|
33
|
+
<html>
|
34
|
+
<head>
|
35
|
+
<title>Enrollment Table</title>
|
36
|
+
<style>
|
37
|
+
table { border-collapse: collapse; margin-top: 20px; width: 100%; }
|
38
|
+
th, td { border: 1px solid black; padding: 8px; text-align: center; }
|
39
|
+
.header { font-weight: bold; background-color: #f0f0f0; }
|
40
|
+
.grade { font-weight: bold; background-color: #e0e0e0; }
|
41
|
+
.bg-pair { background-color: #f8f8f8; }
|
42
|
+
.category { font-weight: bold; text-align: left; }
|
43
|
+
</style>
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
<h2>Enrolment (By Social Category)</h2>
|
47
|
+
<table>
|
48
|
+
<tr class="grade">
|
49
|
+
<th rowspan="2">Category</th>
|
50
|
+
#{grade_headers}
|
51
|
+
</tr>
|
52
|
+
<tr class="bg-pair">
|
53
|
+
#{bg_headers}
|
54
|
+
</tr>
|
55
|
+
#{table_rows}
|
56
|
+
</table>
|
57
|
+
</body>
|
58
|
+
</html>
|
59
|
+
HTML
|
60
|
+
|
61
|
+
File.write(html_path, html_content)
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
class EwsYamlWriter
|
2
|
+
def self.format_yaml(data)
|
3
|
+
return unless data
|
4
|
+
|
5
|
+
grade_rows = data[:grade_rows]
|
6
|
+
bg_pairs = data[:bg_pairs]
|
7
|
+
|
8
|
+
categories = {
|
9
|
+
'ews' => data[:ews_numbers],
|
10
|
+
}
|
11
|
+
|
12
|
+
yaml_data = {}
|
13
|
+
|
14
|
+
categories.each do |category, numbers|
|
15
|
+
yaml_data[category] = {}
|
16
|
+
bg_pairs.each_with_index do |(x_mid, _), index|
|
17
|
+
next unless grade_rows[index] && grade_rows[index]['text']
|
18
|
+
grade_name = grade_rows[index]['text'].downcase.gsub(' ', '_')
|
19
|
+
nums = numbers&.[](x_mid)
|
20
|
+
boys_text = nums&.first&.[]('text')&.strip
|
21
|
+
girls_text = nums&.last&.[]('text')&.strip
|
22
|
+
yaml_data[category][grade_name] = {
|
23
|
+
'boys' => boys_text.nil? || boys_text.empty? ? nil : boys_text.to_i,
|
24
|
+
'girls' => girls_text.nil? || girls_text.empty? ? nil : girls_text.to_i
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
yaml_data
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class LocationDataReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'State' => {
|
8
|
+
key_path: ['location', 'state'],
|
9
|
+
end_pattern: /District/
|
10
|
+
},
|
11
|
+
'District' => {
|
12
|
+
key_path: ['location', 'district'],
|
13
|
+
end_pattern: /Block/
|
14
|
+
},
|
15
|
+
'Block' => {
|
16
|
+
key_path: ['location', 'block'],
|
17
|
+
end_pattern: /Rural/
|
18
|
+
},
|
19
|
+
'Rural / Urban' => {
|
20
|
+
key_path: ['location', 'area_type'],
|
21
|
+
end_pattern: /Cluster/
|
22
|
+
},
|
23
|
+
'Pincode' => {
|
24
|
+
key_path: ['location', 'pincode']
|
25
|
+
},
|
26
|
+
'Ward' => {
|
27
|
+
key_path: ['location', 'ward'],
|
28
|
+
end_pattern: /Mohalla/
|
29
|
+
},
|
30
|
+
'Cluster' => {
|
31
|
+
key_path: ['location', 'cluster'],
|
32
|
+
end_pattern: /Ward/
|
33
|
+
},
|
34
|
+
'Municipality' => {
|
35
|
+
key_path: ['location', 'municipality'],
|
36
|
+
end_pattern: /Assembly/
|
37
|
+
},
|
38
|
+
'Assembly Const.' => {
|
39
|
+
key_path: ['location', 'assembly_constituency'],
|
40
|
+
end_pattern: /Parl/
|
41
|
+
},
|
42
|
+
'Parl. Constituency' => {
|
43
|
+
key_path: ['location', 'parliamentary_constituency'],
|
44
|
+
end_pattern: /School/
|
45
|
+
}
|
46
|
+
}
|
47
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require_relative 'data_reader_base'
|
2
|
+
|
3
|
+
class OfficialDataReader
|
4
|
+
include DataReaderBase
|
5
|
+
|
6
|
+
FIELD_MAPPINGS = {
|
7
|
+
'Year of Establishment' => {
|
8
|
+
key_path: ['official', 'established'],
|
9
|
+
value_type: :integer
|
10
|
+
},
|
11
|
+
'Year of Recognition-Pri.' => {
|
12
|
+
key_path: ['official', 'recognition', 'primary'],
|
13
|
+
value_type: :integer
|
14
|
+
},
|
15
|
+
'Year of Recognition-Upr.Pri.' => {
|
16
|
+
key_path: ['official', 'recognition', 'upper_primary'],
|
17
|
+
value_type: :integer
|
18
|
+
},
|
19
|
+
'Year of Recognition-Sec.' => {
|
20
|
+
key_path: ['official', 'recognition', 'secondary'],
|
21
|
+
value_type: :integer
|
22
|
+
},
|
23
|
+
'Year of Recognition-Higher Sec.' => {
|
24
|
+
key_path: ['official', 'recognition', 'higher_secondary'],
|
25
|
+
value_type: :integer
|
26
|
+
},
|
27
|
+
'Affiliation Board-Sec' => {
|
28
|
+
key_path: ['official', 'affiliation', 'secondary'],
|
29
|
+
end_pattern: /Affiliation Board-HSec/
|
30
|
+
},
|
31
|
+
'Affiliation Board-HSec' => {
|
32
|
+
key_path: ['official', 'affiliation', 'higher_secondary'],
|
33
|
+
end_pattern: /Is this/
|
34
|
+
},
|
35
|
+
'School Management' => {
|
36
|
+
key_path: ['official', 'management'],
|
37
|
+
end_pattern: /School Type/
|
38
|
+
}
|
39
|
+
}
|
40
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class PDFBlockExtractor
|
2
|
+
def self.extract_blocks(reader)
|
3
|
+
blocks = []
|
4
|
+
|
5
|
+
reader.pages.each_with_index do |page, index|
|
6
|
+
page_number = index + 1
|
7
|
+
current_block = {}
|
8
|
+
|
9
|
+
page.raw_content.each_line do |line|
|
10
|
+
if line.include?('BT')
|
11
|
+
current_block = {
|
12
|
+
page: page_number,
|
13
|
+
start_line: line.strip,
|
14
|
+
text: [] # Initialize as array to collect multiple text blocks
|
15
|
+
}
|
16
|
+
elsif line.match?(/1\s+0\s+0\s+1\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+Tm/)
|
17
|
+
# Only set coordinates if not already set
|
18
|
+
unless current_block[:x] && current_block[:y]
|
19
|
+
matches = line.match(/1\s+0\s+0\s+1\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+Tm/)
|
20
|
+
current_block[:x] = matches[1].to_f
|
21
|
+
current_block[:y] = matches[2].to_f
|
22
|
+
end
|
23
|
+
elsif line.match?(/\/F(\d+)\s+(\d+(\.\d+)?)\s+Tf/)
|
24
|
+
# Only set font if not already set
|
25
|
+
unless current_block[:font] && current_block[:font_size]
|
26
|
+
matches = line.match(/\/F(\d+)\s+(\d+(\.\d+)?)\s+Tf/)
|
27
|
+
current_block[:font] = "F#{matches[1]}"
|
28
|
+
current_block[:font_size] = matches[2].to_f
|
29
|
+
end
|
30
|
+
elsif line.match?(/\((.*?)\)\s*Tj/)
|
31
|
+
# Collect all text blocks, remove escape characters
|
32
|
+
text = line.match(/\((.*?)\)\s*Tj/)[1]
|
33
|
+
text = text.gsub(/\\/, '') # Remove escape characters
|
34
|
+
current_block[:text] << text
|
35
|
+
elsif line.include?('ET')
|
36
|
+
current_block[:end_line] = line.strip
|
37
|
+
# Join all text blocks with space
|
38
|
+
current_block[:text] = current_block[:text].join(' ')
|
39
|
+
# Only add non-empty blocks with coordinates
|
40
|
+
if !current_block[:text].empty? && current_block[:x] && current_block[:y]
|
41
|
+
blocks << current_block.dup
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
blocks
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
class PDFContentCompressor
|
2
|
+
def self.compress(content)
|
3
|
+
compressed = []
|
4
|
+
current_block = []
|
5
|
+
in_bt_block = false
|
6
|
+
current_text = ""
|
7
|
+
|
8
|
+
content.each_line do |line|
|
9
|
+
if line.include?('BT')
|
10
|
+
in_bt_block = true
|
11
|
+
current_block = []
|
12
|
+
current_text = ""
|
13
|
+
elsif line.include?('ET')
|
14
|
+
in_bt_block = false
|
15
|
+
current_text = current_block.join("")
|
16
|
+
compressed << current_text unless current_text.empty?
|
17
|
+
elsif in_bt_block && line =~ /\((.*?)\)\s*Tj/
|
18
|
+
# Extract text between (...) followed by Tj
|
19
|
+
text = $1.strip
|
20
|
+
if text =~ /^(?:Non|Residenti|al|Digit|al Facil|ities)$/
|
21
|
+
# Special handling for split text
|
22
|
+
current_text += text
|
23
|
+
current_block << text
|
24
|
+
else
|
25
|
+
if !current_text.empty?
|
26
|
+
compressed << current_text
|
27
|
+
end
|
28
|
+
current_text = text
|
29
|
+
current_block = [text]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
compressed.reject(&:empty?).join("\n")
|
35
|
+
end
|
36
|
+
end
|