looper 1.5.1__py3-none-any.whl → 1.6.0a1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
looper/html_reports.py DELETED
@@ -1,1057 +0,0 @@
1
- """ Generate HTML reports """
2
-
3
- import glob
4
- import logging
5
- import os
6
- import re
7
- import sys
8
- from copy import copy as cp
9
- from datetime import timedelta
10
-
11
- import jinja2
12
- import pandas as _pd
13
- from eido import read_schema
14
- from peppy.const import *
15
-
16
- from ._version import __version__ as v
17
- from .const import *
18
- from .processed_project import get_project_outputs
19
- from .utils import get_file_for_project_old
20
-
21
- _LOGGER = logging.getLogger("looper")
22
-
23
-
24
- class HTMLReportBuilderOld(object):
25
- """Generate HTML summary report for project/samples"""
26
-
27
- def __init__(self, prj):
28
- """
29
- The Project defines the instance.
30
-
31
- :param Project prj: Project with which to work/operate on
32
- """
33
- super(HTMLReportBuilderOld, self).__init__()
34
- self.prj = prj
35
- self.j_env = get_jinja_env()
36
- self.reports_dir = get_file_for_project_old(self.prj, "reports")
37
- self.index_html_path = get_file_for_project_old(self.prj, "summary.html")
38
- self.index_html_filename = os.path.basename(self.index_html_path)
39
- self._outdir = self.prj.output_dir
40
- _LOGGER.debug("Reports dir: {}".format(self.reports_dir))
41
-
42
- def __call__(self, objs, stats, columns):
43
- """Do the work of the subcommand/program."""
44
- # Generate HTML report
45
- navbar = self.create_navbar(
46
- self.create_navbar_links(objs=objs, stats=stats, wd=self._outdir),
47
- self.index_html_filename,
48
- )
49
- navbar_reports = self.create_navbar(
50
- self.create_navbar_links(objs=objs, stats=stats, wd=self.reports_dir),
51
- os.path.join(os.pardir, self.index_html_filename),
52
- )
53
- index_html_path = self.create_index_html(
54
- objs,
55
- stats,
56
- columns,
57
- footer=self.create_footer(),
58
- navbar=navbar,
59
- navbar_reports=navbar_reports,
60
- )
61
- return index_html_path
62
-
63
- def create_object_parent_html(self, objs, navbar, footer):
64
- """
65
- Generates a page listing all the project objects with links
66
- to individual object pages
67
-
68
- :param pandas.DataFrame objs: project level dataframe containing any reported objects for all samples
69
- :param str navbar: HTML to be included as the navbar in the main summary page
70
- :param str footer: HTML to be included as the footer
71
- :return str: Rendered parent objects HTML file
72
- """
73
- object_parent_path = os.path.join(self.reports_dir, "objects.html")
74
-
75
- if not os.path.exists(os.path.dirname(object_parent_path)):
76
- os.makedirs(os.path.dirname(object_parent_path))
77
- pages = list()
78
- labels = list()
79
- if not objs.empty:
80
- for key in objs["key"].drop_duplicates().sort_values():
81
- page_name = key + ".html"
82
- page_path = os.path.join(
83
- self.reports_dir, page_name.replace(" ", "_").lower()
84
- )
85
- page_relpath = os.path.relpath(page_path, self.reports_dir)
86
- pages.append(page_relpath)
87
- labels.append(key)
88
-
89
- template_vars = dict(
90
- navbar=navbar, footer=footer, labels=labels, pages=pages, header="Objects"
91
- )
92
- return render_jinja_template(
93
- "navbar_list_parent.html", self.j_env, template_vars
94
- )
95
-
96
- def create_sample_parent_html(self, navbar, footer):
97
- """
98
- Generates a page listing all the project samples with links
99
- to individual sample pages
100
- :param str navbar: HTML to be included as the navbar in the main summary page
101
- :param str footer: HTML to be included as the footer
102
- :return str: Rendered parent samples HTML file
103
- """
104
- sample_parent_path = os.path.join(self.reports_dir, "samples.html")
105
-
106
- if not os.path.exists(os.path.dirname(sample_parent_path)):
107
- os.makedirs(os.path.dirname(sample_parent_path))
108
- pages = list()
109
- labels = list()
110
- for sample in self.prj.samples:
111
- sample_name = str(sample.sample_name)
112
- sample_dir = os.path.join(self.prj.results_folder, sample_name)
113
-
114
- # Confirm sample directory exists, then build page
115
- if os.path.exists(sample_dir):
116
- page_name = sample_name + ".html"
117
- page_path = os.path.join(
118
- self.reports_dir, page_name.replace(" ", "_").lower()
119
- )
120
- page_relpath = os.path.relpath(page_path, self.reports_dir)
121
- pages.append(page_relpath)
122
- labels.append(sample_name)
123
-
124
- template_vars = dict(
125
- navbar=navbar, footer=footer, labels=labels, pages=pages, header="Samples"
126
- )
127
- return render_jinja_template(
128
- "navbar_list_parent.html", self.j_env, template_vars
129
- )
130
-
131
- def create_navbar(self, navbar_links, index_html_relpath):
132
- """
133
- Creates the navbar using the privided links
134
-
135
- :param str navbar_links: HTML list of links to be inserted into a navbar
136
- :return str: navbar HTML
137
- """
138
- template_vars = dict(navbar_links=navbar_links, index_html=index_html_relpath)
139
- return render_jinja_template("navbar.html", self.j_env, template_vars)
140
-
141
- def create_footer(self):
142
- """
143
- Renders the footer from the templates directory
144
-
145
- :return str: footer HTML
146
- """
147
- return render_jinja_template("footer.html", self.j_env, dict(version=v))
148
-
149
- def create_navbar_links(
150
- self, objs, stats, wd=None, context=None, include_status=True
151
- ):
152
- """
153
- Return a string containing the navbar prebuilt html.
154
-
155
- Generates links to each page relative to the directory of interest (wd arg) or uses the provided context to
156
- create the paths (context arg)
157
-
158
- :param pandas.DataFrame objs: project results dataframe containing
159
- object data
160
- :param list stats[dict] stats: a summary file of pipeline statistics for each
161
- analyzed sample
162
- :param path wd: the working directory of the current HTML page being generated, enables navbar links
163
- relative to page
164
- :param list[str] context: the context the links will be used in.
165
- The sequence of directories to be prepended to the HTML file in the resulting navbar
166
- :param bool include_status: whether the status link should be included in the links set
167
- :return str: navbar links as HTML-formatted string
168
- """
169
- if wd is None and context is None:
170
- raise ValueError(
171
- "Either 'wd' (path the links should be relative to) or 'context'"
172
- " (the context for the links) has to be provided."
173
- )
174
- status_relpath = _make_relpath(
175
- file_name=os.path.join(self.reports_dir, "status.html"),
176
- wd=wd,
177
- context=context,
178
- )
179
- objects_relpath = _make_relpath(
180
- file_name=os.path.join(self.reports_dir, "objects.html"),
181
- wd=wd,
182
- context=context,
183
- )
184
- samples_relpath = _make_relpath(
185
- file_name=os.path.join(self.reports_dir, "samples.html"),
186
- wd=wd,
187
- context=context,
188
- )
189
- dropdown_keys_objects = None
190
- dropdown_relpaths_objects = None
191
- dropdown_relpaths_samples = None
192
- sample_names = None
193
- if objs is not None and not objs.dropna().empty:
194
- # If the number of objects is 20 or less, use a drop-down menu
195
- if len(objs["key"].drop_duplicates()) <= 20:
196
- (
197
- dropdown_relpaths_objects,
198
- dropdown_keys_objects,
199
- ) = _get_navbar_dropdown_data_objects(
200
- objs=objs, wd=wd, context=context, reports_dir=self.reports_dir
201
- )
202
- else:
203
- dropdown_relpaths_objects = objects_relpath
204
- if stats:
205
- if len(stats) <= 20:
206
- (
207
- dropdown_relpaths_samples,
208
- sample_names,
209
- ) = _get_navbar_dropdown_data_samples(
210
- stats=stats, wd=wd, context=context, reports_dir=self.reports_dir
211
- )
212
- else:
213
- # Create a menu link to the samples parent page
214
- dropdown_relpaths_samples = samples_relpath
215
- status_page_name = "Status" if include_status else None
216
- template_vars = dict(
217
- status_html_page=status_relpath,
218
- status_page_name=status_page_name,
219
- dropdown_keys_objects=dropdown_keys_objects,
220
- objects_page_name="Objects",
221
- samples_page_name="Samples",
222
- objects_html_page=dropdown_relpaths_objects,
223
- samples_html_page=dropdown_relpaths_samples,
224
- menu_name_objects="Objects",
225
- menu_name_samples="Samples",
226
- sample_names=sample_names,
227
- all_samples=samples_relpath,
228
- all_objects=objects_relpath,
229
- )
230
- return render_jinja_template("navbar_links.html", self.j_env, template_vars)
231
-
232
- def create_object_html(self, single_object, navbar, footer):
233
- """
234
- Generates a page for an individual object type with all of its
235
- plots from each sample
236
-
237
- :param pandas.DataFrame single_object: contains reference
238
- information for an individual object type for all samples
239
- :param pandas.DataFrame objs: project level dataframe
240
- containing any reported objects for all samples
241
- :param str navbar: HTML to be included as the navbar in the main summary page
242
- :param str footer: HTML to be included as the footer
243
- """
244
-
245
- # Generate object filename
246
- for key in single_object["key"].drop_duplicates().sort_values():
247
- # even though it's always one element, loop to extract the data
248
- current_name = str(key)
249
- filename = current_name + ".html"
250
- html_page_path = os.path.join(
251
- self.reports_dir, filename.replace(" ", "_").lower()
252
- )
253
-
254
- if not os.path.exists(os.path.dirname(html_page_path)):
255
- os.makedirs(os.path.dirname(html_page_path))
256
-
257
- links = []
258
- figures = []
259
- warnings = []
260
- for i, row in single_object.iterrows():
261
- # Set the PATH to a page for the sample. Catch any errors.
262
- try:
263
- object_path = os.path.join(
264
- self.prj.results_folder, row["sample_name"], row["filename"]
265
- )
266
- object_relpath = os.path.relpath(object_path, self.reports_dir)
267
- except AttributeError:
268
- err_msg = "Sample: {} | " + "Missing valid object path for: {}"
269
- # Report the sample that fails, if that information exists
270
- if str(row["sample_name"]) and str(row["filename"]):
271
- _LOGGER.warning(err_msg.format(row["sample_name"], row["filename"]))
272
- else:
273
- _LOGGER.warning(err_msg.format("Unknown sample"))
274
- object_relpath = ""
275
-
276
- # Set the PATH to the image/file. Catch any errors.
277
- # Check if the object is an HTML document
278
-
279
- if not str(row["anchor_image"]).lower().endswith(IMAGE_EXTS):
280
- image_path = object_path
281
- else:
282
- try:
283
- image_path = os.path.join(
284
- self.prj.results_folder, row["sample_name"], row["anchor_image"]
285
- )
286
- except AttributeError:
287
- _LOGGER.warning(str(row))
288
- err_msg = "Sample: {} | " + "Missing valid image path for: {}"
289
- # Report the sample that fails, if that information exists
290
- if str(row["sample_name"]) and str(row["filename"]):
291
- _LOGGER.warning(
292
- err_msg.format(row["sample_name"], row["filename"])
293
- )
294
- else:
295
- _LOGGER.warning(err_msg.format("Unknown", "Unknown"))
296
- image_path = ""
297
- # Check for the presence of both the file and thumbnail
298
- if os.path.isfile(image_path) and os.path.isfile(object_path):
299
- image_relpath = os.path.relpath(image_path, self.reports_dir)
300
- # If the object has a valid image, use it!
301
- _LOGGER.debug("Checking image path: {}".format(image_path))
302
- if str(image_path).lower().endswith(IMAGE_EXTS):
303
- figures.append(
304
- [object_relpath, str(row["sample_name"]), image_relpath]
305
- )
306
- # Or if that "image" is not an image, treat it as a link
307
- elif not str(image_path).lower().endswith(IMAGE_EXTS):
308
- _LOGGER.debug("Got link")
309
- links.append([str(row["sample_name"]), image_relpath])
310
- else:
311
- warnings.append(str(row["filename"]))
312
-
313
- if warnings:
314
- _LOGGER.warning(
315
- "create_object_html: "
316
- + filename.replace(" ", "_").lower()
317
- + " references nonexistent object files"
318
- )
319
- _LOGGER.debug(
320
- filename.replace(" ", "_").lower()
321
- + " nonexistent files: "
322
- + ",".join(str(x) for x in warnings)
323
- )
324
- template_vars = dict(
325
- navbar=navbar,
326
- footer=footer,
327
- name=current_name,
328
- figures=figures,
329
- links=links,
330
- )
331
- save_html(
332
- html_page_path,
333
- render_jinja_template("object.html", self.j_env, args=template_vars),
334
- )
335
-
336
- def create_sample_html(self, objs, sample_name, sample_stats, navbar, footer):
337
- """
338
- Produce an HTML page containing all of a sample's objects
339
- and the sample summary statistics
340
-
341
- :param pandas.DataFrame objs: project level dataframe containing
342
- any reported objects for all samples
343
- :param str sample_name: the name of the current sample
344
- :param dict sample_stats: pipeline run statistics for the current sample
345
- :param str navbar: HTML to be included as the navbar in the main summary page
346
- :param str footer: HTML to be included as the footer
347
- :return str: path to the produced HTML page
348
- """
349
- html_filename = sample_name + ".html"
350
- html_page = os.path.join(
351
- self.reports_dir, html_filename.replace(" ", "_").lower()
352
- )
353
- sample_page_relpath = os.path.relpath(html_page, self._outdir)
354
- single_sample = (
355
- _pd.DataFrame() if objs.empty else objs[objs["sample_name"] == sample_name]
356
- )
357
- if not os.path.exists(os.path.dirname(html_page)):
358
- os.makedirs(os.path.dirname(html_page))
359
- sample_dir = os.path.join(self.prj.results_folder, sample_name)
360
- if os.path.exists(sample_dir):
361
- if single_sample.empty:
362
- # When there is no objects.tsv file, search for the
363
- # presence of log, profile, and command files
364
- log_name = _match_file_for_sample(
365
- sample_name, "log.md", self.prj.results_folder
366
- )
367
- profile_name = _match_file_for_sample(
368
- sample_name, "profile.tsv", self.prj.results_folder
369
- )
370
- command_name = _match_file_for_sample(
371
- sample_name, "commands.sh", self.prj.results_folder
372
- )
373
- else:
374
- log_name = str(single_sample.iloc[0]["annotation"]) + "_log.md"
375
- profile_name = str(single_sample.iloc[0]["annotation"]) + "_profile.tsv"
376
- command_name = str(single_sample.iloc[0]["annotation"]) + "_commands.sh"
377
- stats_name = "stats.tsv"
378
- flag = _get_flags(sample_dir)
379
- # get links to the files
380
- stats_file_path = _get_relpath_to_file(
381
- stats_name, sample_name, self.prj.results_folder, self.reports_dir
382
- )
383
- profile_file_path = _get_relpath_to_file(
384
- profile_name, sample_name, self.prj.results_folder, self.reports_dir
385
- )
386
- commands_file_path = _get_relpath_to_file(
387
- command_name, sample_name, self.prj.results_folder, self.reports_dir
388
- )
389
- log_file_path = _get_relpath_to_file(
390
- log_name, sample_name, self.prj.results_folder, self.reports_dir
391
- )
392
- if not flag:
393
- button_class = "btn btn-secondary"
394
- flag = "Missing"
395
- elif len(flag) > 1:
396
- button_class = "btn btn-secondary"
397
- flag = "Multiple"
398
- else:
399
- flag = flag[0]
400
- try:
401
- flag_dict = BUTTON_APPEARANCE_BY_FLAG[flag]
402
- except KeyError:
403
- button_class = "btn btn-secondary"
404
- flag = "Unknown"
405
- else:
406
- button_class = flag_dict["button_class"]
407
- flag = flag_dict["flag"]
408
- links = []
409
- figures = []
410
- warnings = []
411
- if not single_sample.empty:
412
- for sample_name in (
413
- single_sample["sample_name"].drop_duplicates().sort_values()
414
- ):
415
- o = single_sample[single_sample["sample_name"] == sample_name]
416
- for i, row in o.iterrows():
417
- try:
418
- # Image thumbnails are optional
419
- # This references to "image" should really
420
- # be "thumbnail"
421
- image_path = os.path.join(
422
- self.prj.results_folder, sample_name, row["anchor_image"]
423
- )
424
- image_relpath = os.path.relpath(image_path, self.reports_dir)
425
- except (AttributeError, TypeError):
426
- image_path = ""
427
- image_relpath = ""
428
-
429
- # These references to "page" should really be
430
- # "object", because they can be anything.
431
- page_path = os.path.join(
432
- self.prj.results_folder, sample_name, row["filename"]
433
- )
434
- page_relpath = os.path.relpath(page_path, self.reports_dir)
435
- # If the object has a thumbnail image, add as a figure
436
- if os.path.isfile(image_path) and os.path.isfile(page_path):
437
- # If the object has a valid image, add as a figure
438
- if (
439
- str(image_path)
440
- .lower()
441
- .endswith((".png", ".jpg", ".jpeg", ".svg", ".gif"))
442
- ):
443
- figures.append(
444
- [page_relpath, str(row["key"]), image_relpath]
445
- )
446
- # Otherwise treat as a link
447
- elif os.path.isfile(page_path):
448
- links.append([str(row["key"]), page_relpath])
449
- # If neither, there is no object by that name
450
- else:
451
- warnings.append(str(row["filename"]))
452
- # If no thumbnail image, it's just a link
453
- elif os.path.isfile(page_path):
454
- links.append([str(row["key"]), page_relpath])
455
- # If no file present, there is no object by that name
456
- else:
457
- warnings.append(str(row["filename"]))
458
- else:
459
- # Sample was not run through the pipeline
460
- _LOGGER.warning(
461
- "{} is not present in {}".format(sample_name, self.prj.results_folder)
462
- )
463
-
464
- template_vars = dict(
465
- navbar=navbar,
466
- footer=footer,
467
- sample_name=sample_name,
468
- stats_file_path=stats_file_path,
469
- profile_file_path=profile_file_path,
470
- commands_file_path=commands_file_path,
471
- log_file_path=log_file_path,
472
- button_class=button_class,
473
- sample_stats=sample_stats,
474
- flag=flag,
475
- links=links,
476
- figures=figures,
477
- )
478
- save_html(
479
- html_page, render_jinja_template("sample.html", self.j_env, template_vars)
480
- )
481
- return sample_page_relpath
482
-
483
- def create_status_html(self, status_table, navbar, footer):
484
- """
485
- Generates a page listing all the samples, their run status, their
486
- log file, and the total runtime if completed.
487
-
488
- :param pandas.DataFrame objs: project level dataframe containing any reported objects for all samples
489
- :param str navbar: HTML to be included as the navbar in the main summary page
490
- :param str footer: HTML to be included as the footer
491
- :return str: rendered status HTML file
492
- """
493
- _LOGGER.debug("Building status page...")
494
- template_vars = dict(status_table=status_table, navbar=navbar, footer=footer)
495
- return render_jinja_template("status.html", self.j_env, template_vars)
496
-
497
- def create_project_objects(self):
498
- """
499
- Render available project level outputs defined in the
500
- pipeline output schemas
501
- """
502
- _LOGGER.debug("Building project objects section...")
503
- figures = []
504
- links = []
505
- warnings = []
506
- # For each protocol report the project summarizers' results
507
- self.prj.populate_pipeline_outputs()
508
- ifaces = self.prj.project_pipeline_interfaces
509
- # Check the interface files for summarizers
510
- for iface in ifaces:
511
- schema_paths = iface.get_pipeline_schemas(OUTPUT_SCHEMA_KEY)
512
- if schema_paths is not None:
513
- if isinstance(schema_paths, str):
514
- schema_paths = [schema_paths]
515
- for output_schema_path in schema_paths:
516
- results = get_project_outputs(
517
- self.prj, read_schema(output_schema_path)
518
- )
519
- for name, result in results.items():
520
- title = str(result.setdefault("title", "No caption"))
521
- result_type = str(result["type"])
522
- result_file = str(result["path"])
523
- result_img = str(result.setdefault("thumbnail_path", None))
524
- if result_img and not os.path.isabs(result_file):
525
- result_img = os.path.join(self._outdir, result_img)
526
- if not os.path.isabs(result_file):
527
- result_file = os.path.join(self._outdir, result_file)
528
- _LOGGER.debug(
529
- "Looking for project file: {}".format(result_file)
530
- )
531
- # Confirm the file itself was produced
532
- if glob.glob(result_file):
533
- file_path = str(glob.glob(result_file)[0])
534
- file_relpath = os.path.relpath(file_path, self._outdir)
535
- if result_type == "image":
536
- # Add as a figure, find thumbnail
537
- search = os.path.join(self._outdir, result_img)
538
- if glob.glob(search):
539
- img_path = str(glob.glob(search)[0])
540
- img_relpath = os.path.relpath(
541
- img_path, self._outdir
542
- )
543
- figures.append([file_relpath, title, img_relpath])
544
- # add as a link otherwise
545
- # TODO: add more fine-grained type support?
546
- # not just image and link
547
- else:
548
- links.append([title, file_relpath])
549
- else:
550
- warnings.append("{} ({})".format(title, result_file))
551
- else:
552
- _LOGGER.debug(
553
- "No project-level outputs defined in "
554
- "schema: {}".format(schema_paths)
555
- )
556
- if warnings:
557
- _LOGGER.warning("Not found: {}".format([str(x) for x in warnings]))
558
- _LOGGER.debug("collected project-level figures: {}".format(figures))
559
- _LOGGER.debug("collected project-level links: {}".format(links))
560
- template_vars = dict(figures=figures, links=links)
561
- return render_jinja_template("project_object.html", self.j_env, template_vars)
562
-
563
- def create_index_html(
564
- self, objs, stats, col_names, navbar, footer, navbar_reports=None
565
- ):
566
- """
567
- Generate an index.html style project home page w/ sample summary
568
- statistics
569
-
570
- :param pandas.DataFrame objs: project level dataframe containing
571
- any reported objects for all samples
572
- :param list[dict] stats: a summary file of pipeline statistics for each
573
- analyzed sample
574
- :param list col_names: all unique column names used in the stats file
575
- :param str navbar: HTML to be included as the navbar in the main summary page
576
- :param str footer: HTML to be included as the footer
577
- :param str navbar_reports: HTML to be included as the navbar for pages in the reports directory
578
- """
579
- # set default encoding when running in python2
580
- if sys.version[0] == "2":
581
- from importlib import reload
582
-
583
- reload(sys)
584
- sys.setdefaultencoding("utf-8")
585
- _LOGGER.debug("Building index page...")
586
- # copy the columns names and remove the sample_name one, since it will be processed differently
587
- cols = cp(col_names)
588
- cols.remove("sample_name")
589
- if navbar_reports is None:
590
- navbar_reports = navbar
591
- if not objs.dropna().empty:
592
- objs.drop_duplicates(keep="last", inplace=True)
593
- # Generate parent index.html page path
594
- index_html_path = get_file_for_project_old(self.prj, "summary.html")
595
-
596
- # Add stats_summary.tsv button link
597
- stats_file_name = os.path.join(self._outdir, self.prj.name)
598
- if hasattr(self.prj, "subproject") and self.prj.subproject:
599
- stats_file_name += "_" + self.prj.subproject
600
- stats_file_name += "_stats_summary.tsv"
601
- stats_file_path = os.path.relpath(stats_file_name, self._outdir)
602
- # Add stats summary table to index page and produce individual
603
- # sample pages
604
- if os.path.isfile(stats_file_name):
605
- # Produce table rows
606
- table_row_data = []
607
- samples_cols_missing = []
608
- _LOGGER.debug(" * Creating sample pages...")
609
- for row in stats:
610
- table_cell_data = []
611
- sample_name = row["sample_name"]
612
- sample_page = self.create_sample_html(
613
- objs, sample_name, row, navbar_reports, footer
614
- )
615
- # treat sample_name column differently - provide a link to the sample page
616
- table_cell_data.append([sample_page, sample_name])
617
- # for each column read the data from the stats
618
- for c in cols:
619
- try:
620
- table_cell_data.append(str(row[c]))
621
- except KeyError:
622
- table_cell_data.append("NA")
623
- samples_cols_missing.append(sample_name)
624
- table_row_data.append(table_cell_data)
625
- _LOGGER.debug(
626
- "Samples with missing columns: {}".format(set(samples_cols_missing))
627
- )
628
- else:
629
- _LOGGER.warning("No stats file '%s'", stats_file_name)
630
-
631
- # Create parent samples page with links to each sample
632
- save_html(
633
- os.path.join(self.reports_dir, "samples.html"),
634
- self.create_sample_parent_html(navbar_reports, footer),
635
- )
636
- _LOGGER.debug(" * Creating object pages...")
637
- # Create objects pages
638
- if not objs.dropna().empty:
639
- for key in objs["key"].drop_duplicates().sort_values():
640
- single_object = objs[objs["key"] == key]
641
- self.create_object_html(single_object, navbar_reports, footer)
642
-
643
- # Create parent objects page with links to each object type
644
- save_html(
645
- os.path.join(self.reports_dir, "objects.html"),
646
- self.create_object_parent_html(objs, navbar_reports, footer),
647
- )
648
- # Create status page with each sample's status listed
649
- save_html(
650
- os.path.join(self.reports_dir, "status.html"),
651
- self.create_status_html(
652
- create_status_table(self.prj), navbar_reports, footer
653
- ),
654
- )
655
- # Add project level objects
656
- project_objects = self.create_project_objects()
657
- # Complete and close HTML file
658
- template_vars = dict(
659
- project_name=self.prj.name,
660
- stats_json=_read_tsv_to_json(stats_file_name),
661
- navbar=navbar,
662
- footer=footer,
663
- stats_file_path=stats_file_path,
664
- project_objects=project_objects,
665
- columns=col_names,
666
- table_row_data=table_row_data,
667
- )
668
- save_html(
669
- index_html_path,
670
- render_jinja_template("index.html", self.j_env, template_vars),
671
- )
672
- return index_html_path
673
-
674
-
675
- def render_jinja_template(name, jinja_env, args=dict()):
676
- """
677
- Render template in the specified jinja environment using the provided args
678
-
679
- :param str name: name of the template
680
- :param dict args: arguments to pass to the template
681
- :param jinja2.Environment jinja_env: the initialized environment to use in this the looper HTML reports context
682
- :return str: rendered template
683
- """
684
- assert isinstance(args, dict), "args has to be a dict"
685
- template = jinja_env.get_template(name)
686
- return template.render(**args)
687
-
688
-
689
- def save_html(path, template):
690
- """
691
- Save rendered template as an HTML file
692
-
693
- :param str path: the desired location for the file to be produced
694
- :param str template: the template or just string
695
- """
696
- if not os.path.exists(os.path.dirname(path)):
697
- os.makedirs(os.path.dirname(path))
698
- try:
699
- with open(path, "w") as f:
700
- f.write(template)
701
- except IOError:
702
- _LOGGER.error("Could not write the HTML file: {}".format(path))
703
-
704
-
705
- def get_jinja_env(templates_dirname=None):
706
- """
707
- Create jinja environment with the provided path to the templates directory
708
-
709
- :param str templates_dirname: path to the templates directory
710
- :return jinja2.Environment: jinja environment
711
- """
712
- if templates_dirname is None:
713
- file_dir = os.path.dirname(os.path.realpath(__file__))
714
- templates_dirname = os.path.join(file_dir, f"{TEMPLATES_DIRNAME}_old")
715
- _LOGGER.debug("Using templates dir: " + templates_dirname)
716
- return jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dirname))
717
-
718
-
719
- def _get_flags(sample_dir):
720
- """
721
- Get the flag(s) present in the directory
722
-
723
- :param str sample_dir: path to the directory to be searched for flags
724
- :return list: flags found in the dir
725
- """
726
- assert os.path.exists(sample_dir), "The provided path ('{}') does not exist".format(
727
- sample_dir
728
- )
729
- flag_files = glob.glob(os.path.join(sample_dir, "*.flag"))
730
- if len(flag_files) > 1:
731
- _LOGGER.warning(
732
- "Multiple flag files ({files_count}) found in sample dir '{sample_dir}'".format(
733
- files_count=len(flag_files), sample_dir=sample_dir
734
- )
735
- )
736
- if len(flag_files) == 0:
737
- _LOGGER.warning(
738
- "No flag files found in sample dir '{sample_dir}'".format(
739
- sample_dir=sample_dir
740
- )
741
- )
742
- return [
743
- re.search(r"\_([a-z]+)\.flag$", os.path.basename(f)).groups()[0]
744
- for f in flag_files
745
- ]
746
-
747
-
748
- def _match_file_for_sample(sample_name, appendix, location, full_path=False):
749
- """
750
- Safely looks for files matching the appendix in the specified location for the sample
751
-
752
- :param str sample_name: name of the sample that the file name should be found for
753
- :param str appendix: the ending specific for the file
754
- :param str location: where to look for the file
755
- :param bool full_path: whether to return full path
756
- :return str: the name of the matched file
757
- """
758
- regex = "*" + appendix
759
- search_pattern = os.path.join(location, sample_name, regex)
760
- matches = glob.glob(search_pattern)
761
- if len(matches) < 1:
762
- return None
763
- elif len(matches) > 1:
764
- _LOGGER.warning(
765
- "matched mutiple files for '{}'. Returning the first one".format(
766
- search_pattern
767
- )
768
- )
769
- return matches[0] if full_path else os.path.basename(matches[0])
770
-
771
-
772
- def _get_relpath_to_file(file_name, sample_name, location, relative_to):
773
- """
774
- Safely gets the relative path for the file for the specified sample
775
-
776
- :param str file_name: name of the file
777
- :param str sample_name: name of the sample that the file path should be found for
778
- :param str location: where to look for the file
779
- :param str relative_to: path the result path should be relative to
780
- :return str: a path to the file
781
- """
782
- abs_file_path = os.path.join(location, sample_name, file_name)
783
- rel_file_path = os.path.relpath(abs_file_path, relative_to)
784
- if file_name is None or not os.path.exists(abs_file_path):
785
- return None
786
- return rel_file_path
787
-
788
-
789
- def _make_relpath(file_name, wd, context=None):
790
- """
791
- Create a path relative to the context. This function introduces the flexibility to the navbar links creation,
792
- which the can be used outside of the native looper summary pages.
793
-
794
- :param str file_name: the path to make relative
795
- :param str wd: the dir the path should be relative to
796
- :param list[str] context: the context the links will be used in.
797
- The sequence of directories to be prepended to the HTML file in the resulting navbar
798
- :return str: relative path
799
- """
800
- relpath = os.path.relpath(file_name, wd)
801
- return relpath if not context else os.path.join(os.path.join(*context), relpath)
802
-
803
-
804
- def _get_navbar_dropdown_data_objects(objs, wd, context, reports_dir):
805
- if objs is None:
806
- return None, None
807
- relpaths = []
808
- df_keys = objs["key"].drop_duplicates().sort_values()
809
- for key in df_keys:
810
- page_name = os.path.join(reports_dir, (key + ".html").replace(" ", "_").lower())
811
- relpaths.append(_make_relpath(page_name, wd, context))
812
- return relpaths, df_keys
813
-
814
-
815
- def _get_navbar_dropdown_data_samples(stats, wd, context, reports_dir):
816
- if stats is None:
817
- return None, None
818
- relpaths = []
819
- sample_names = []
820
- for sample in stats:
821
- for entry, val in sample.items():
822
- if entry == "sample_name":
823
- sample_name = str(val)
824
- page_name = os.path.join(
825
- reports_dir, (sample_name + ".html").replace(" ", "_").lower()
826
- )
827
- relpaths.append(_make_relpath(page_name, wd, context))
828
- sample_names.append(sample_name)
829
- break
830
- else:
831
- _LOGGER.warning("Could not determine sample name in stats.tsv")
832
- return relpaths, sample_names
833
-
834
-
835
- def _read_csv_encodings(path, encodings=["utf-8", "ascii"], **kwargs):
836
- """
837
- Try to read file with the provided encodings
838
-
839
- :param str path: path to file
840
- :param list encodings: list of encodings to try
841
- """
842
- idx = 0
843
- while idx < len(encodings):
844
- e = encodings[idx]
845
- try:
846
- t = _pd.read_csv(path, encoding=e, **kwargs)
847
- return t
848
- except UnicodeDecodeError:
849
- pass
850
- idx = idx + 1
851
- _LOGGER.warning(
852
- "Could not read the log file '{p}' with encodings '{enc}'".format(
853
- p=path, enc=encodings
854
- )
855
- )
856
-
857
-
858
- def _get_from_log(log_path, regex):
859
- """
860
- Get the value for the matched key from log file
861
-
862
- :param str log_path: path to the log file
863
- :param str regex: matching str. Should be formatted as follows: r'(phrase to match)'
864
- :return str: matched and striped string
865
- :raises IOError: when the file is not found in the provided path
866
- """
867
- if not os.path.exists(log_path):
868
- raise IOError("Can't read the log file '{}'. Not found".format(log_path))
869
- log = _read_csv_encodings(log_path, header=None, names=["data"])
870
- if log is None:
871
- _LOGGER.warning("'{r}' was not read from log".format(r=regex))
872
- return None
873
- # match regex, get row(s) that matched the regex
874
- log_row = log.iloc[:, 0].str.extractall(regex)
875
- # not matches? return None
876
- if log_row.empty:
877
- return None
878
- if log_row.size > 1:
879
- _LOGGER.warning(
880
- "When parsing '{lp}', more than one values matched with: {r}. Returning first.".format(
881
- lp=log_path, r=regex
882
- )
883
- )
884
- # split the matched line by first colon return stripped data.
885
- # This way both mem values (e.g 1.1GB) and time values (e.g 1:10:10) will work.
886
- val = log.iloc[log_row.index[0][0]].str.split(":", 1, expand=True)[1][0].strip()
887
- return val
888
-
889
-
890
- def _read_tsv_to_json(path):
891
- """
892
- Read a tsv file to a JSON formatted string
893
-
894
- :param path: to file path
895
- :return str: JSON formatted string
896
- """
897
- assert os.path.exists(path), "The file '{}' does not exist".format(path)
898
- _LOGGER.debug("Reading TSV from '{}'".format(path))
899
- df = _pd.read_csv(path, sep="\t", index_col=False, header=None)
900
- return df.to_json()
901
-
902
-
903
- def uniqify(seq):
904
- """Fast way to uniqify while preserving input order."""
905
- # http://stackoverflow.com/questions/480214/
906
- seen = set()
907
- seen_add = seen.add
908
- return [x for x in seq if not (x in seen or seen_add(x))]
909
-
910
-
911
- def create_status_table(prj, final=True):
912
- """
913
- Creates status table, the core of the status page.
914
- It is abstracted into a function so that it can be used in other software
915
- packages. It can produce a table of two types. With links to the
916
- samples/log files and without. The one without can be used to render HTMLs
917
- for on-th-fly job status inspection.
918
-
919
- :param looper.Project prj: project to create the status table for
920
- :param bool final: if the status table is created for a finalized looper
921
- run. In such a case, links to samples and log files will be provided
922
- :return str: rendered status HTML file
923
- """
924
- status_warning = False
925
- sample_warning = []
926
- log_paths = []
927
- log_link_names = []
928
- sample_paths = []
929
- sample_link_names = []
930
- flags = []
931
- row_classes = []
932
- times = []
933
- mems = []
934
- for sample in prj.samples:
935
- sample_name = str(sample.sample_name)
936
- sample_dir = os.path.join(prj.results_folder, sample_name)
937
-
938
- # Confirm sample directory exists, then build page
939
- if os.path.exists(sample_dir):
940
- # Grab the status flag for the current sample
941
- flag = _get_flags(sample_dir)
942
- if not flag:
943
- button_class = "table-secondary"
944
- flag = "Missing"
945
- elif len(flag) > 1:
946
- button_class = "table-secondary"
947
- flag = "Multiple"
948
- else:
949
- flag = flag[0]
950
- try:
951
- flag_dict = TABLE_APPEARANCE_BY_FLAG[flag]
952
- except KeyError:
953
- button_class = "table-secondary"
954
- flag = "Unknown"
955
- else:
956
- button_class = flag_dict["button_class"]
957
- flag = flag_dict["flag"]
958
- row_classes.append(button_class)
959
- # get first column data (sample name/link)
960
- page_name = sample_name + ".html"
961
- page_path = os.path.join(
962
- get_file_for_project_old(prj, "reports"),
963
- page_name.replace(" ", "_").lower(),
964
- )
965
- page_relpath = os.path.relpath(
966
- page_path, get_file_for_project_old(prj, "reports")
967
- )
968
- sample_paths.append(page_relpath)
969
- sample_link_names.append(sample_name)
970
- # get second column data (status/flag)
971
- flags.append(flag)
972
- # get third column data (log file/link)
973
- log_name = _match_file_for_sample(sample_name, "log.md", prj.results_folder)
974
- log_file_link = _get_relpath_to_file(
975
- log_name,
976
- sample_name,
977
- prj.results_folder,
978
- get_file_for_project_old(prj, "reports"),
979
- )
980
- log_link_names.append(log_name)
981
- log_paths.append(log_file_link)
982
- # get fourth column data (runtime) and fifth column data (memory)
983
- profile_file_path = _match_file_for_sample(
984
- sample.sample_name, "profile.tsv", prj.results_folder, full_path=True
985
- )
986
- if os.path.exists(profile_file_path):
987
- df = _pd.read_csv(
988
- profile_file_path, sep="\t", comment="#", names=PROFILE_COLNAMES
989
- )
990
- df["runtime"] = _pd.to_timedelta(df["runtime"])
991
- times.append(_get_runtime(df))
992
- mems.append(_get_maxmem(df))
993
- else:
994
- _LOGGER.warning("'{}' does not exist".format(profile_file_path))
995
- times.append(NO_DATA_PLACEHOLDER)
996
- mems.append(NO_DATA_PLACEHOLDER)
997
- else:
998
- # Sample was not run through the pipeline
999
- sample_warning.append(sample_name)
1000
-
1001
- # Alert the user to any warnings generated
1002
- if status_warning:
1003
- _LOGGER.warning(
1004
- "The stats table is incomplete, likely because one or "
1005
- "more jobs either failed or is still running."
1006
- )
1007
- if sample_warning:
1008
- _LOGGER.warning(
1009
- "{} samples not present in {}: {}".format(
1010
- len(sample_warning),
1011
- prj.results_folder,
1012
- str([sample for sample in sample_warning]),
1013
- )
1014
- )
1015
- template_vars = dict(
1016
- sample_link_names=sample_link_names,
1017
- row_classes=row_classes,
1018
- flags=flags,
1019
- times=times,
1020
- mems=mems,
1021
- )
1022
- template_name = "status_table_no_links.html"
1023
- if final:
1024
- template_name = "status_table.html"
1025
- template_vars.update(
1026
- dict(
1027
- sample_paths=sample_paths,
1028
- log_link_names=log_link_names,
1029
- log_paths=log_paths,
1030
- )
1031
- )
1032
- return render_jinja_template(template_name, get_jinja_env(), template_vars)
1033
-
1034
-
1035
- def _get_maxmem(profile_df):
1036
- """
1037
- Get current peak memory
1038
-
1039
- :param pandas.core.frame.DataFrame profile_df: a data frame representing the current profile.tsv for a sample
1040
- :return str: max memory
1041
- """
1042
- return "{} GB".format(
1043
- str(max(profile_df["mem"]) if not profile_df["mem"].empty else 0)
1044
- )
1045
-
1046
-
1047
- def _get_runtime(profile_df):
1048
- """
1049
- Collect the unique and last duplicated runtimes, sum them and then return in str format
1050
-
1051
- :param pandas.core.frame.DataFrame profile_df: a data frame representing the current profile.tsv for a sample
1052
- :return str: sum of runtimes
1053
- """
1054
- unique_df = profile_df[~profile_df.duplicated("cid", keep="last").values]
1055
- return str(
1056
- timedelta(seconds=sum(unique_df["runtime"].apply(lambda x: x.total_seconds())))
1057
- ).split(".")[0]