nnpdf 4.1.0__py3-none-any.whl → 4.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. n3fit/backends/keras_backend/MetaModel.py +27 -26
  2. n3fit/backends/keras_backend/callbacks.py +16 -8
  3. n3fit/backends/keras_backend/internal_state.py +13 -2
  4. n3fit/backends/keras_backend/operations.py +26 -26
  5. n3fit/hyper_optimization/hyper_scan.py +3 -9
  6. n3fit/hyper_optimization/penalties.py +11 -8
  7. n3fit/hyper_optimization/rewards.py +65 -34
  8. n3fit/model_gen.py +344 -270
  9. n3fit/model_trainer.py +71 -105
  10. n3fit/performfit.py +2 -7
  11. n3fit/tests/regressions/quickcard_1.json +12 -28
  12. n3fit/tests/regressions/quickcard_3.json +12 -28
  13. n3fit/tests/regressions/quickcard_pol_1.json +10 -26
  14. n3fit/tests/regressions/quickcard_pol_3.json +9 -25
  15. n3fit/tests/regressions/quickcard_qed_1.json +11 -27
  16. n3fit/tests/regressions/quickcard_qed_3.json +11 -27
  17. n3fit/tests/test_hyperopt.py +6 -12
  18. n3fit/tests/test_layers.py +6 -6
  19. n3fit/tests/test_modelgen.py +73 -24
  20. n3fit/tests/test_multireplica.py +52 -16
  21. n3fit/tests/test_penalties.py +7 -8
  22. n3fit/tests/test_preprocessing.py +2 -2
  23. n3fit/tests/test_vpinterface.py +5 -10
  24. n3fit/vpinterface.py +88 -44
  25. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/METADATA +9 -3
  26. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/RECORD +105 -67
  27. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/WHEEL +1 -1
  28. nnpdf_data/_version.py +1 -1
  29. nnpdf_data/commondata/ATLAS_2JET_7TEV_R06/metadata.yaml +16 -5
  30. nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/data.yaml +2 -0
  31. nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/kinematics.yaml +13 -0
  32. nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/metadata.yaml +51 -0
  33. nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/uncertainties.yaml +17 -0
  34. nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/data.yaml +2 -0
  35. nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/kinematics.yaml +13 -0
  36. nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/metadata.yaml +52 -0
  37. nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/uncertainties.yaml +22 -0
  38. nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/data.yaml +3 -0
  39. nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/kinematics.yaml +17 -0
  40. nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/metadata.yaml +57 -0
  41. nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/uncertainties.yaml +8 -0
  42. nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/data.yaml +2 -0
  43. nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/kinematics.yaml +9 -0
  44. nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/metadata.yaml +54 -0
  45. nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/uncertainties.yaml +7 -0
  46. nnpdf_data/commondata/CMS_1JET_8TEV/metadata.yaml +7 -1
  47. nnpdf_data/commondata/CMS_2JET_7TEV/metadata.yaml +16 -19
  48. nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/data.yaml +2 -0
  49. nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/kinematics.yaml +13 -0
  50. nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/metadata.yaml +51 -0
  51. nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/uncertainties.yaml +12 -0
  52. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_d2Sig_dmttBar_dyttBar.yaml +17 -0
  53. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_dSig_dmttBar.yaml +8 -0
  54. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_dSig_dpTt.yaml +8 -0
  55. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_dSig_dyt.yaml +11 -0
  56. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/filter.py +260 -0
  57. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_d2Sig_dmttBar_dyttBar.yaml +193 -0
  58. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_dSig_dmttBar.yaml +57 -0
  59. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_dSig_dpTt.yaml +57 -0
  60. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_dSig_dyt.yaml +81 -0
  61. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/metadata.yaml +114 -0
  62. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/mtt_abs_parton.yaml +828 -0
  63. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/mttytt-abs_parton.yaml +1899 -0
  64. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/ptt_abs_parton.yaml +828 -0
  65. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/submission.yaml +47 -0
  66. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/yt_abs_parton.yaml +1179 -0
  67. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_d2Sig_dmttBar_dyttBar.yaml +2282 -0
  68. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_dSig_dmttBar.yaml +1256 -0
  69. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_dSig_dpTt.yaml +1256 -0
  70. nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_dSig_dyt.yaml +1598 -0
  71. nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/data.yaml +2 -0
  72. nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/kinematics.yaml +13 -0
  73. nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/metadata.yaml +51 -0
  74. nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/uncertainties.yaml +17 -0
  75. nnpdf_data/commondata/CMS_TTBAR_5TEV_TOT/metadata.yaml +1 -1
  76. nnpdf_data/commondata/NNPDF_POS_2P24GEV/metadata.yaml +60 -0
  77. nnpdf_data/commondata/dataset_names.yml +6 -1
  78. nnpdf_data/theory_cards/41000010.yaml +42 -0
  79. nnpdf_data/theory_cards/41000011.yaml +43 -0
  80. nnpdf_data/theory_cards/41000012.yaml +43 -0
  81. nnpdf_data/theory_cards/41000013.yaml +42 -0
  82. nnpdf_data/theory_cards/41000014.yaml +43 -0
  83. nnpdf_data/theory_cards/41000015.yaml +43 -0
  84. validphys/_version.py +1 -1
  85. validphys/config.py +30 -10
  86. validphys/convolution.py +37 -14
  87. validphys/coredata.py +15 -5
  88. validphys/covmats.py +9 -2
  89. validphys/dataplots.py +1 -1
  90. validphys/filters.py +17 -3
  91. validphys/fkparser.py +11 -1
  92. validphys/gridvalues.py +1 -0
  93. validphys/hessian2mc.py +5 -5
  94. validphys/lhaindex.py +5 -0
  95. validphys/loader.py +1 -1
  96. validphys/n3fit_data.py +107 -61
  97. validphys/nnprofile_default.yaml +2 -1
  98. validphys/pineparser.py +12 -2
  99. validphys/scripts/postfit.py +4 -4
  100. validphys/scripts/vp_pdfrename.py +8 -9
  101. validphys/tests/conftest.py +6 -2
  102. validphys/tests/test_hessian2mc.py +7 -5
  103. validphys/utils.py +1 -0
  104. n3fit/tests/regressions/quickcard_pol/filter.yml +0 -80
  105. n3fit/tests/regressions/quickcard_pol/nnfit/input/lockfile.yaml +0 -111
  106. n3fit/tests/regressions/quickcard_pol/nnfit/replica_1/quickcard_pol.exportgrid +0 -572
  107. n3fit/tests/regressions/quickcard_pol/nnfit/replica_1/quickcard_pol.json +0 -71
  108. n3fit/tests/regressions/quickcard_pol/nnfit/replica_3/quickcard_pol.exportgrid +0 -615
  109. n3fit/tests/regressions/quickcard_pol/nnfit/replica_3/quickcard_pol.json +0 -71
  110. n3fit/tests/regressions/weights.weights.h5 +0 -0
  111. n3fit/tests/regressions/weights_pol.weights.h5 +0 -0
  112. n3fit/tests/test +0 -1
  113. nnpdf_data/theory_cards/40000099.yaml +0 -41
  114. nnpdf_data/theory_cards/40000099.yml +0 -41
  115. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/entry_points.txt +0 -0
  116. {nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info/licenses}/LICENSE +0 -0
n3fit/model_gen.py CHANGED
@@ -1,16 +1,23 @@
1
1
  """
2
- Library of functions which generate the NN objects
3
-
4
- Contains:
5
- # observable_generator:
6
- Generates the output layers as functions
7
- # pdfNN_layer_generator:
8
- Generates the PDF NN layer to be fitted
9
-
10
-
2
+ Library of functions which generate the models used by n3fit to determine PDF.
3
+
4
+ It contains functions to generate:
5
+
6
+ 1) Observables
7
+ The main function is ``observable_generator`` which takes the input theory
8
+ and generates the path from the PDF result to the computation of the
9
+ training and validation losses / chi2
10
+
11
+ 2) PDFs
12
+ The main function is ``generate_pdf_model``, which takes a list of settings
13
+ defining the replica-dependent architecture of each of the models that form
14
+ the ensemble as well as ensemble-wide options such as the flavour basis,
15
+ sum rule definition or theoretical settings, and generates a PDF model
16
+ which takes an array of (x) as input and outputs the value of the PDF
17
+ for each replica, for each x for each flavour.
11
18
  """
12
19
 
13
- from dataclasses import dataclass
20
+ from dataclasses import asdict, dataclass, field
14
21
  from typing import Callable
15
22
 
16
23
  import numpy as np
@@ -323,53 +330,162 @@ def observable_generator(
323
330
  return layer_info
324
331
 
325
332
 
333
+ @dataclass
334
+ class ReplicaSettings:
335
+ """Dataclass which holds all necessary replica-dependent information of a PDF.
336
+
337
+ Parameters
338
+ ----------
339
+ seed: int
340
+ seed for the initialization of the neural network
341
+ nodes: list[int]
342
+ nodes of each of the layers, starting at the first hidden layer
343
+ activations: list[str]
344
+ list of activation functions, should be of equal length as nodes
345
+ architecture: str
346
+ select the architecture of the neural network used for the replica,
347
+ e.g. ``dense`` or ``dense_per_flavour``
348
+ initializer: str
349
+ initializer to be used for this replica
350
+ dropout: float
351
+ rate of dropout for each layer
352
+ regularizer: str
353
+ name of the regularizer to use for this replica (if any)
354
+ regularizer_args: dict
355
+ options to pass down to the regularizer (if any)
356
+ """
357
+
358
+ seed: int
359
+ nodes: list[int]
360
+ activations: list[str]
361
+ architecture: str = "dense"
362
+ initializer: str = "glorot_normal"
363
+ dropout_rate: float = 0.0
364
+ regularizer: str = None
365
+ regularizer_args: dict = field(default_factory=dict)
366
+
367
+ def __post_init__(self):
368
+ """Apply checks to the input, and expand hyperopt callables"""
369
+ # Expansions
370
+ if callable(self.activations):
371
+ # Hyperopt might pass down a function to generate the list of activations
372
+ # depending on the number of layers
373
+ self.activations = self.activations(len(self.nodes))
374
+
375
+ if self.regularizer_args is None:
376
+ self.regularizer_args = dict()
377
+
378
+ # Checks
379
+ if len(self.nodes) != len(self.activations):
380
+ raise ValueError(
381
+ f"nodes and activations do not match ({self.nodes} vs {self.activations}"
382
+ )
383
+ if self.regularizer_args and self.regularizer is None:
384
+ raise ValueError(
385
+ "Regularizer arguments have been provided but no regularizer is selected"
386
+ )
387
+
388
+
326
389
  def generate_pdf_model(
327
- nodes: list[int] = None,
328
- activations: list[str] = None,
329
- initializer_name: str = "glorot_normal",
330
- layer_type: str = "dense",
390
+ replicas_settings: list[ReplicaSettings],
331
391
  flav_info: dict = None,
332
392
  fitbasis: str = "NN31IC",
333
393
  out: int = 14,
334
- seed: int = None,
335
- dropout: float = 0.0,
336
- regularizer: str = None,
337
- regularizer_args: dict = None,
338
394
  impose_sumrule: str = None,
339
395
  scaler: Callable = None,
340
- num_replicas: int = 1,
341
396
  photons: Photon = None,
342
397
  ):
343
398
  """
344
- Wrapper around pdfNN_layer_generator to allow the generation of single replica models.
399
+ Generation of the full PDF model which will be used to determine the full PDF.
400
+ The full PDF model can have any number of replicas, which can be trained in parallel,
401
+ the limitations of the determination means that there are certain traits that all replicas
402
+ must share, while others are fre per-PDF.
403
+
404
+ In its most general form, the output of this function is a :py:class:`n3fit.backend.MetaModel`
405
+ with the following architecture:
406
+
407
+ <input layer>
408
+ in the standard PDF fit this includes only the (x) grid of the NN
409
+
410
+ [ list of a separate architecture per replica ]
411
+ which can be, but is not necessary, equal for all replicas
412
+
413
+ [ <preprocessing factors> ]
414
+ postprocessing of the network output by a variation x^{alpha}*(1-x)^{beta}
415
+
416
+ <normalization>
417
+ physical sum rules, requires an integral over the PDF
418
+
419
+ <rotation to FK-basis>
420
+ regardless of the physical basis in which the PDF and preprocessing factors are applied
421
+ the output is rotated to the 14-flavour general basis used in FkTables following
422
+ PineaAPPL's convention
423
+
424
+ [<output layer>]
425
+ 14 flavours per value of x per replica
426
+ note that, depending on the fit basis (and fitting scale)
427
+ the output of the PDF will contain repeated values
428
+
429
+
430
+ This function defines how the PDFs will be generated.
431
+ In the case of identical PDF models (``identical_models = True``, default) the same
432
+ settings will be used for all replicas.
433
+ Otherwise, the sampling routines will be used.
434
+
345
435
 
346
436
  Parameters:
347
437
  -----------
348
- see model_gen.pdfNN_layer_generator
438
+ replica_settings: list[ReplicaSettings]
439
+ list of ReplicaSettings objects which must contain the following information
440
+ nodes: list(int)
441
+ list of the number of nodes per layer of the PDF NN
442
+ activation: list
443
+ list of activation functions to apply to each layer
444
+ initializer_name: str
445
+ selects the initializer of the weights of the NN. Default: glorot_normal
446
+ layer_type: str
447
+ selects the type of architecture of the NN. Default: dense
448
+ seed: int
449
+ the initialization seed for the NN
450
+ dropout: float
451
+ rate of dropout layer by layer
452
+ regularizer: str
453
+ name of the regularizer to use for the NN
454
+ regularizer_args: dict
455
+ options to pass down to the regularizer (if any)
456
+ flav_info: dict
457
+ dictionary containing the information about each PDF (basis dictionary in the runcard)
458
+ to be used by Preprocessing
459
+ fitbasis: str
460
+ fitbasis used during the fit. Default: NN31IC
461
+ out: int
462
+ number of output flavours of the model (default 14)
463
+ impose_sumrule: str
464
+ whether to impose sumrules on the output pdf and which one to impose (All, MSR, VSR, TSR)
465
+ scaler: callable
466
+ Function to apply to the input. If given the input to the model
467
+ will be a (1, None, 2) tensor where dim [:,:,0] is scaled
468
+ When None, instead turn the x point into a (x, log(x)) pair
469
+ photons: :py:class:`validphys.photon.compute.Photon`
470
+ If given, gives the AddPhoton layer a function to compute a photon which will be added at the
471
+ index 0 of the 14-size FK basis
472
+ This same function will also be used to compute the MSR component for the photon
349
473
 
350
474
  Returns
351
475
  -------
352
476
  pdf_model: MetaModel
353
- pdf model, with `single_replica_generator` attached in a list as an attribute
477
+ pdf model, with `single_replica_generator` attached as an attribute
354
478
  """
355
- joint_args = {
356
- "nodes": nodes,
357
- "activations": activations,
358
- "initializer_name": initializer_name,
359
- "layer_type": layer_type,
479
+ shared_config = {
360
480
  "flav_info": flav_info,
361
481
  "fitbasis": fitbasis,
362
- "out": out,
363
- "dropout": dropout,
364
- "regularizer": regularizer,
365
- "regularizer_args": regularizer_args,
482
+ "output_size": out,
366
483
  "impose_sumrule": impose_sumrule,
367
484
  "scaler": scaler,
485
+ "photons": photons,
368
486
  }
369
487
 
370
- pdf_model = pdfNN_layer_generator(
371
- **joint_args, seed=seed, num_replicas=num_replicas, photons=photons
372
- )
488
+ pdf_model = _pdfNN_layer_generator(replicas_settings, **shared_config)
373
489
 
374
490
  # Note that the photons are passed unchanged to the single replica generator
375
491
  # computing the photon requires running fiatlux which takes 30' per replica
@@ -377,33 +493,36 @@ def generate_pdf_model(
377
493
  # In order to enable it `single_replica_generator` must take the index of the replica
378
494
  # to select the appropiate photon as all of them will be computed and fixed before the fit
379
495
 
380
- # this is necessary to be able to convert back to single replica models after training
381
- single_replica_generator = lambda: pdfNN_layer_generator(
382
- **joint_args, seed=0, num_replicas=1, photons=photons, replica_axis=False
383
- )
496
+ def single_replica_generator(replica_idx=0):
497
+ """Generate one single replica from the entire batch.
498
+ The select index is relative to the batch, not the entire PDF determination.
499
+
500
+ This function is necessary to separate all the different models after training.
501
+ """
502
+ settings = replicas_settings[replica_idx]
503
+ # TODO:
504
+ # In principle we want to recover the initial replica exactly,
505
+ # however, for the regression tests to pass
506
+ # _in the polarized case and only in the polarized case_ this line is necessary
507
+ # it most likely has to do with numerical precision, but panicking might be in order
508
+ settings.seed = 0
509
+ return _pdfNN_layer_generator([settings], **shared_config, replica_axis=False)
510
+
384
511
  pdf_model.single_replica_generator = single_replica_generator
385
512
 
386
513
  return pdf_model
387
514
 
388
515
 
389
- def pdfNN_layer_generator(
390
- nodes: list[int] = None,
391
- activations: list[str] = None,
392
- initializer_name: str = "glorot_normal",
393
- layer_type: str = "dense",
516
+ def _pdfNN_layer_generator(
517
+ replicas_settings: list[ReplicaSettings],
394
518
  flav_info: dict = None,
395
519
  fitbasis: str = "NN31IC",
396
- out: int = 14,
397
- seed: int = None,
398
- dropout: float = 0.0,
399
- regularizer: str = None,
400
- regularizer_args: dict = None,
520
+ output_size: int = 14,
401
521
  impose_sumrule: str = None,
402
522
  scaler: Callable = None,
403
- num_replicas: int = 1,
404
523
  photons: Photon = None,
405
524
  replica_axis: bool = True,
406
- ): # pylint: disable=too-many-locals
525
+ ):
407
526
  """
408
527
  Generates the PDF model which takes as input a point in x (from 0 to 1)
409
528
  and outputs a basis of 14 PDFs.
@@ -419,7 +538,6 @@ def pdfNN_layer_generator(
419
538
  is the rotation from the fitting basis to the physical basis needed for the
420
539
  convolution with the fktables.
421
540
 
422
-
423
541
  `layer_type` defines the architecture of the Neural Network, currently
424
542
  the following two options are implemented:
425
543
  - `dense`
@@ -459,41 +577,35 @@ def pdfNN_layer_generator(
459
577
 
460
578
  >>> import numpy as np
461
579
  >>> from n3fit.vpinterface import N3PDF
462
- >>> from n3fit.model_gen import pdfNN_layer_generator
580
+ >>> from n3fit.model_gen import _pdfNN_layer_generator, ReplicaSettings
463
581
  >>> from validphys.pdfgrids import xplotting_grid
464
- >>> fake_fl = [{'fl' : i, 'largex' : [0,1], 'smallx': [1,2]} for i in ['u', 'ubar', 'd', 'dbar', 'c', 'cbar', 's', 'sbar']]
465
- >>> fake_x = np.linspace(1e-3,0.8,3)
466
- >>> pdf_model = pdfNN_layer_generator(nodes=[8], activations=['linear'], seed=[2,3], flav_info=fake_fl, num_replicas=2)
582
+ >>> rp = [ReplicaSettings(nodes = [8], activations=["linear"], seed=i) for i in [1,2]]
583
+ >>> fake_fl = [{'fl' : i, 'largex' : [0,1], 'smallx': [1,2]} for i in ['u', 'ubar', 'd', 'dbar', 'c', 'g', 's', 'sbar']]
584
+ >>> fake_x = np.linspace(1e-3,0.8,3).reshape(1,-1,1)
585
+ >>> pdf_model = _pdfNN_layer_generator(rp, flav_info=fake_fl, fitbasis='FLAVOUR', impose_sumrule=False)
586
+ >>> pdf_model(fake_x).shape
587
+ TensorShape([1, 2, 3, 14])
588
+
589
+ # 1 batch, 2 replicas, 3 x points, 14 flavours
590
+
467
591
 
468
592
  Parameters
469
593
  ----------
470
- nodes: list(int)
471
- list of the number of nodes per layer of the PDF NN. Default: [15,8]
472
- activation: list
473
- list of activation functions to apply to each layer. Default: ["tanh", "linear"]
474
- if the number of activation function does not match the number of layers, it will add
475
- copies of the first activation function found
476
- initializer_name: str
477
- selects the initializer of the weights of the NN. Default: glorot_normal
478
- layer_type: str
479
- selects the type of architecture of the NN. Default: dense
594
+ replicas_settings: list(:py:class:`ReplicaSettings`)
595
+ list of ``ReplicaSettings`` objects holding the settings of each of the replicas
480
596
  flav_info: dict
481
597
  dictionary containing the information about each PDF (basis dictionary in the runcard)
482
598
  to be used by Preprocessing
483
- out: int
599
+ fitbasis: str
600
+ fitbasis used during the fit. Default: NN31IC
601
+ output_size: int
484
602
  number of output flavours of the model (default 14)
485
- seed: list(int)
486
- seed to initialize the NN
487
- dropout: float
488
- rate of dropout layer by layer
489
603
  impose_sumrule: str
490
604
  whether to impose sumrules on the output pdf and which one to impose (All, MSR, VSR, TSR)
491
605
  scaler: callable
492
606
  Function to apply to the input. If given the input to the model
493
607
  will be a (1, None, 2) tensor where dim [:,:,0] is scaled
494
608
  When None, instead turn the x point into a (x, log(x)) pair
495
- num_replicas: int
496
- How many models should be trained in parallel
497
609
  photon: :py:class:`validphys.photon.compute.Photon`
498
610
  If given, gives the AddPhoton layer a function to compute a photon which will be added at the
499
611
  index 0 of the 14-size FK basis
@@ -507,63 +619,44 @@ def pdfNN_layer_generator(
507
619
  pdf_model: n3fit.backends.MetaModel
508
620
  a model f(x) = y where x is a tensor (1, xgrid, 1) and y a tensor (1, replicas, xgrid, out)
509
621
  """
510
- # Parse the input configuration
511
- if seed is None:
512
- seed = num_replicas * [None]
513
- elif isinstance(seed, int):
514
- seed = num_replicas * [seed]
515
-
516
- if nodes is None:
517
- nodes = [15, 8]
518
- ln = len(nodes)
622
+ all_seed = [i.seed for i in replicas_settings]
623
+ num_replicas = len(replicas_settings)
519
624
 
520
625
  if impose_sumrule is None:
521
626
  impose_sumrule = "All"
522
627
 
523
- if activations is None:
524
- activations = ["tanh", "linear"]
525
- elif callable(activations):
526
- # hyperopt passes down a function to generate dynamically the list of
527
- # activations functions
528
- activations = activations(ln)
529
-
530
- if regularizer_args is None:
531
- regularizer_args = dict()
532
-
533
- # The number of nodes in the last layer is equal to the number of fitted flavours
534
- last_layer_nodes = nodes[-1] # (== len(flav_info))
535
-
536
- # Process input options. There are 2 options:
537
- # 1. Scale the input
538
- # 2. Concatenate log(x) to the input
539
- use_feature_scaling = scaler is not None
540
-
541
- # When scaler is active we also want to do the subtraction of large x
542
- # TODO: make it its own option (i.e., one could want to use this without using scaler)
543
- subtract_one = use_feature_scaling
628
+ ## Process the input data (x grid)
629
+ # There a currently two options:
630
+ # 1. Append log(x) to the input
631
+ # 2. Scale the input
632
+ do_nothing = lambda x: x
633
+ model_input = {}
544
634
 
545
- # Feature scaling happens before the pdf model and changes x->(scaler(x), x),
546
- # so it adds an input dimension
547
- pdf_input_dimensions = 2 if use_feature_scaling else 1
548
- # Adding of logs happens inside, but before the NN and adds a dimension there
549
- nn_input_dimensions = 1 if use_feature_scaling else 2
635
+ if scaler is None: # add log(x)
636
+ use_feature_scaling = subtract_one = False
637
+ # The PDF itself receives only x
638
+ pdf_input_dimensions = 1
639
+ # But the NN will see (x, log(x))
640
+ nn_input_dimensions = 2
550
641
 
551
- # Define the main input
552
- do_nothing = lambda x: x
553
- if use_feature_scaling:
554
- pdf_input = Input(shape=(None, pdf_input_dimensions), batch_size=1, name="scaledx_x")
555
- process_input = do_nothing
556
- extract_nn_input = Lambda(lambda x: op.op_gather_keep_dims(x, 0, axis=-1), name="x_scaled")
557
- extract_original = Lambda(lambda x: op.op_gather_keep_dims(x, 1, axis=-1), name="pdf_input")
558
- else: # add log(x)
559
642
  pdf_input = Input(shape=(None, pdf_input_dimensions), batch_size=1, name="pdf_input")
560
643
  process_input = Lambda(lambda x: op.concatenate([x, op.op_log(x)], axis=-1), name="x_logx")
561
644
  extract_original = do_nothing
562
645
  extract_nn_input = do_nothing
646
+ else:
647
+ use_feature_scaling = subtract_one = True
648
+ # The NN will only receive x
649
+ nn_input_dimensions = 1
650
+ # But the PDF itself will receive both (x, scaler(x))
651
+ pdf_input_dimensions = 2
563
652
 
564
- model_input = {"pdf_input": pdf_input}
653
+ pdf_input = Input(shape=(None, pdf_input_dimensions), batch_size=1, name="scaledx_x")
654
+ process_input = do_nothing
655
+ extract_nn_input = Lambda(lambda x: op.op_gather_keep_dims(x, 0, axis=-1), name="x_scaled")
656
+ extract_original = Lambda(lambda x: op.op_gather_keep_dims(x, 1, axis=-1), name="pdf_input")
565
657
 
566
658
  if subtract_one:
659
+ # TODO: make it its own option, even though now it only activates in the scaler if above
567
660
  input_x_eq_1 = [1.0]
568
661
  if use_feature_scaling:
569
662
  input_x_eq_1 = scaler(input_x_eq_1)[0]
@@ -572,50 +665,39 @@ def pdfNN_layer_generator(
572
665
  layer_x_eq_1 = op.numpy_to_input(np.array(input_x_eq_1).reshape(1, 1), name="x_eq_1")
573
666
  model_input["layer_x_eq_1"] = layer_x_eq_1
574
667
 
575
- # the layer that multiplies the NN output by the preprocessing factor
576
- apply_preprocessing_factor = Lambda(op.op_multiply, name="prefactor_times_NN")
668
+ model_input["pdf_input"] = pdf_input
577
669
 
578
- # Photon layer
579
- layer_photon = AddPhoton(photons=photons, name="add_photon")
670
+ ## Create the actual NeuralNetwork PDF
671
+ # loop over the settings for all replicas and generate a list of NN per replica
672
+ # which will be then stack together and built into a single (input -> output) MetaModel
673
+ # all PDFs _must_ share the same input layer
674
+ x_input = Input(shape=(None, nn_input_dimensions), batch_size=1, name="NN_input")
580
675
 
581
- # Basis rotation
582
- basis_rotation = FlavourToEvolution(
583
- flav_info=flav_info, fitbasis=fitbasis, name="pdf_evolution_basis"
584
- )
676
+ list_of_nn_pdfs = []
677
+ for i, replica_settings in enumerate(replicas_settings):
678
+ rep_pdf = _generate_nn(x_input, i, **asdict(replica_settings))
679
+ # And build them all with the same input layer
680
+ list_of_nn_pdfs.append(rep_pdf(x_input))
585
681
 
586
- # Evolution layer
587
- layer_evln = FkRotation(output_dim=out, name="pdf_FK_basis")
682
+ # Stack all replicas together as one single object
683
+ nn_pdfs = Lambda(lambda nns: op.stack(nns, axis=1), name="stack_replicas")(list_of_nn_pdfs)
684
+ nn_replicas = MetaModel({'NN_input': x_input}, nn_pdfs, name=NN_LAYER_ALL_REPLICAS)
588
685
 
589
- # Normalization and sum rules
590
- if impose_sumrule:
591
- sumrule_layer, integrator_input = generate_msr_model_and_grid(
592
- fitbasis=fitbasis, mode=impose_sumrule, scaler=scaler, replica_seeds=seed
593
- )
594
- model_input["xgrid_integration"] = integrator_input
595
- else:
596
- sumrule_layer = lambda x: x
686
+ ## Preprocessing factors:
687
+ # the layer that multiplies the NN output by the preprocessing factor
688
+ # This includes
689
+ # - x^{a}(1-x)^{b}
690
+ # - NN(x) - N(1.0)
691
+ apply_preprocessing_factor = Lambda(op.op_multiply, name="prefactor_times_NN")
597
692
 
598
693
  compute_preprocessing_factor = Preprocessing(
599
694
  flav_info=flav_info,
600
695
  name=PREPROCESSING_LAYER_ALL_REPLICAS,
601
- replica_seeds=seed,
696
+ replica_seeds=all_seed,
602
697
  large_x=not subtract_one,
603
698
  )
604
699
 
605
- nn_replicas = generate_nn(
606
- layer_type=layer_type,
607
- nodes_in=nn_input_dimensions,
608
- nodes=nodes,
609
- activations=activations,
610
- initializer_name=initializer_name,
611
- replica_seeds=seed,
612
- dropout=dropout,
613
- regularizer=regularizer,
614
- regularizer_args=regularizer_args,
615
- last_layer_nodes=last_layer_nodes,
616
- )
617
-
618
- # The NN subtracted by NN(1), if applicable
700
+ # The NN subtracted by NN(1), if applicable, otherwise do nothing
619
701
  def nn_subtracted(x):
620
702
  NNs_x = nn_replicas(x)
621
703
 
@@ -626,6 +708,21 @@ def pdfNN_layer_generator(
626
708
 
627
709
  return NNs_x
628
710
 
711
+ ## Unnormalized PDF
712
+ # updf_r(x) = FkRotation( NN_r(input(x)) * preprocessing_layer_r(x) )
713
+ # with _r: replica index
714
+ # input: whatever processing is applied to the input
715
+ # The preprocessing_layer and weights is specific to each replica
716
+ # The final PDF will be in the 14 flavours evolution basis used in the FkTables
717
+
718
+ # Basis rotation
719
+ basis_rotation = FlavourToEvolution(
720
+ flav_info=flav_info, fitbasis=fitbasis, name="pdf_evolution_basis"
721
+ )
722
+
723
+ # Evolution layer
724
+ layer_evln = FkRotation(output_dim=output_size, name="pdf_FK_basis")
725
+
629
726
  def compute_unnormalized_pdf(x):
630
727
  # Preprocess the input grid
631
728
  x_nn_input = extract_nn_input(x)
@@ -641,20 +738,31 @@ def pdfNN_layer_generator(
641
738
  # Apply the preprocessing factor
642
739
  pref_NNs_x = apply_preprocessing_factor([preprocessing_factors_x, NNs_x])
643
740
 
644
- # Apply basis rotation if needed
741
+ # Transform to FK basis, this is the full evolution basis
742
+ # Rotate to the 9f evolution basis first before expanding up to 14f
743
+ # TODO: make these two steps into one
645
744
  if not basis_rotation.is_identity():
646
745
  pref_NNs_x = basis_rotation(pref_NNs_x)
647
-
648
- # Transform to FK basis
649
746
  PDFs_unnormalized = layer_evln(pref_NNs_x)
650
747
 
651
748
  return PDFs_unnormalized
652
749
 
653
750
  PDFs_unnormalized = compute_unnormalized_pdf(pdf_input)
654
751
 
752
+ ## Normalization and sum rules, produces normalized PDF
753
+ # pdf_r(x) = updf_r(x) * Normalization(updf_r(integration_xgrid))
754
+ # The normalization layer is shared across replicas (but evaluated at each replica separately)
755
+ #
655
756
  if impose_sumrule:
757
+ sumrule_layer, integrator_input = generate_msr_model_and_grid(
758
+ fitbasis=fitbasis, mode=impose_sumrule, scaler=scaler, replica_seeds=all_seed
759
+ )
760
+ model_input["xgrid_integration"] = integrator_input
761
+
762
+ # We need a second unnormalized PDF evaluated on the integrated grid
656
763
  PDFs_integration_grid = compute_unnormalized_pdf(integrator_input)
657
764
 
765
+ # Photon contribution to the sum rule
658
766
  if photons:
659
767
  # add batch and flavor dimensions
660
768
  ph_tensor = op.numpy_to_tensor(photons.integral)
@@ -662,7 +770,7 @@ def pdfNN_layer_generator(
662
770
  else:
663
771
  photon_integrals = op.numpy_to_tensor(np.zeros((1, num_replicas, 1)))
664
772
 
665
- PDFs_normalized = sumrule_layer(
773
+ PDFs = sumrule_layer(
666
774
  {
667
775
  "pdf_x": PDFs_unnormalized,
668
776
  "pdf_xgrid_integration": PDFs_integration_grid,
@@ -671,153 +779,119 @@ def pdfNN_layer_generator(
671
779
  "photon_integral": photon_integrals,
672
780
  }
673
781
  )
674
- PDFs = PDFs_normalized
675
782
  else:
676
783
  PDFs = PDFs_unnormalized
784
+ sumrule_layer = lambda x: x
677
785
 
786
+ ## Include the photon in the PDF for QED-enabled fits
787
+ # (by default the entry corresponding to the photon is set to 0)
678
788
  if photons:
789
+ layer_photon = AddPhoton(photons=photons, name="add_photon")
679
790
  PDFs = layer_photon(PDFs)
680
791
 
792
+ # Return a PDF without a replica axis, to extract single replicas from an ensemble
681
793
  if not replica_axis:
682
794
  PDFs = Lambda(lambda pdfs: pdfs[:, 0], name="remove_replica_axis")(PDFs)
683
795
 
684
- pdf_model = MetaModel(model_input, PDFs, name=f"PDFs", scaler=scaler)
685
- return pdf_model
796
+ return MetaModel(model_input, PDFs, name="PDFs", scaler=scaler)
686
797
 
687
798
 
688
- def generate_nn(
689
- layer_type: str,
690
- nodes_in: int,
691
- nodes: list[int],
692
- activations: list[str],
693
- initializer_name: str,
694
- replica_seeds: list[int],
695
- dropout: float,
696
- regularizer: str,
697
- regularizer_args: dict,
698
- last_layer_nodes: int,
799
+ # TODO: is there a way of keeping sincronized the input of this function and ReplicaSettings
800
+ # beyond a test of it? In principle we might want to have the arguments explicitly here...
801
+ def _generate_nn(
802
+ input_layer: Input,
803
+ replica_idx: int = 0,
804
+ seed: int = None,
805
+ nodes: list[int] = None,
806
+ activations: list[str] = None,
807
+ architecture: str = "dense",
808
+ initializer: str = None,
809
+ dropout_rate: float = 0.0,
810
+ regularizer: str = None,
811
+ regularizer_args: dict = field(default_factory=dict),
699
812
  ) -> MetaModel:
700
813
  """
701
- Create the part of the model that contains all of the actual neural network
702
- layers, for each replica.
814
+ Create a Neural Network according to the input settings
703
815
 
704
816
  Parameters
705
817
  ----------
706
- layer_type: str
707
- Type of layer to use. Can be "dense" or "dense_per_flavour".
708
- nodes_in: int
709
- Number of nodes in the input layer.
710
- nodes: List[int]
711
- Number of nodes in each hidden layer.
712
- activations: List[str]
713
- Activation function to use in each hidden layer.
714
- initializer_name: str
715
- Name of the initializer to use.
716
- replica_seeds: List[int]
717
- List of seeds to use for each replica.
718
- dropout: float
719
- Dropout rate to use (if 0, no dropout is used).
720
- regularizer: str
721
- Name of the regularizer to use.
722
- regularizer_args: dict
723
- Arguments to pass to the regularizer.
724
- last_layer_nodes: int
725
- Number of nodes in the last layer.
818
+ input_layer: :py:class:`n3fit.backends.Input`
819
+ input layer of the replica
820
+ replica_idx: int
821
+ Index of the replica used to name the PDF
822
+
823
+ All other arguments follow exactly the documentation
824
+ of ``ReplicaSettings``.
825
+ See :py:class:`n3fit.model_gen.ReplicaSettings`
826
+
726
827
 
727
828
  Returns
728
829
  -------
729
- nn_replicas: MetaModel
730
- Single model containing all replicas.
830
+ nn_pdf: MetaModel
831
+ A single PDF NN model
731
832
  """
732
- nodes_list = list(nodes) # so we can modify it
733
- x_input = Input(shape=(None, nodes_in), batch_size=1, name="NN_input")
734
833
  reg = regularizer_selector(regularizer, **regularizer_args)
834
+ *hidden_layers, n_flavours = nodes
835
+
836
+ # Preparatory step: prepare a ``layer_generator`` function to iteratively create all layers
837
+ # TODO: create a factory of layers instead of an ugly function
838
+ # this layer generator takes the index of the layer (useful for seeding)
839
+ # the output nodes of the layer
840
+ # and the activation function
735
841
 
736
- if layer_type == "dense_per_flavour":
737
- # set the arguments that will define the layer
738
- # but careful, the last layer must be nodes = 1
739
- # TODO the mismatch is due to the fact that basis_size
740
- # is set to the number of nodes of the last layer when it should
741
- # come from the runcard
742
- nodes_list[-1] = 1
743
- basis_size = last_layer_nodes
842
+ if architecture == "dense_per_flavour":
843
+ # Reset the last node in the list to be 1, we will then
844
+ # repeat it n-times
845
+ nodes = hidden_layers + [1]
744
846
 
745
847
  def layer_generator(i_layer, nodes_out, activation):
746
848
  """Generate the ``i_layer``-th dense_per_flavour layer for all replicas."""
747
- layers = []
748
- for replica_seed in replica_seeds:
749
- seed = int(replica_seed + i_layer * basis_size)
750
- initializers = [
751
- MetaLayer.select_initializer(initializer_name, seed=seed + b)
752
- for b in range(basis_size)
753
- ]
754
- layer = base_layer_selector(
755
- layer_type,
756
- kernel_initializer=initializers,
757
- units=int(nodes_out),
758
- activation=activation,
759
- basis_size=basis_size,
760
- )
761
- layers.append(layer)
762
-
763
- return layers
764
-
765
- elif layer_type == "dense":
766
-
767
- def initializer_generator(seed, i_layer):
768
- seed += i_layer
769
- return MetaLayer.select_initializer(initializer_name, seed=int(seed))
849
+ l_seed = int(seed + i_layer * n_flavours)
850
+ initializers = [
851
+ MetaLayer.select_initializer(initializer, seed=l_seed + b)
852
+ for b in range(n_flavours)
853
+ ]
854
+ layer = base_layer_selector(
855
+ architecture,
856
+ kernel_initializer=initializers,
857
+ units=int(nodes_out),
858
+ activation=activation,
859
+ basis_size=n_flavours,
860
+ )
861
+ return layer
862
+
863
+ elif architecture == "dense":
770
864
 
771
865
  def layer_generator(i_layer, nodes_out, activation):
772
- layers = []
773
- for replica_seed in replica_seeds:
774
- layers.append(
775
- base_layer_selector(
776
- layer_type,
777
- kernel_initializer=initializer_generator(replica_seed, i_layer),
778
- units=nodes_out,
779
- activation=activation,
780
- regularizer=reg,
781
- )
782
- )
783
- return layers
866
+ kini = MetaLayer.select_initializer(initializer, seed=int(seed + i_layer))
867
+ return base_layer_selector(
868
+ architecture,
869
+ kernel_initializer=kini,
870
+ units=nodes_out,
871
+ activation=activation,
872
+ regularizer=reg,
873
+ )
784
874
 
785
875
  else:
786
- raise ValueError(f"{layer_type=} not recognized during model generation")
787
-
788
- # First create all the layers
789
- # list_of_pdf_layers[d][r] is the layer at depth d for replica r
790
- list_of_pdf_layers = []
791
- for i_layer, (nodes_out, activation) in enumerate(zip(nodes_list, activations)):
792
- layers = layer_generator(i_layer, nodes_out, activation)
793
- list_of_pdf_layers.append(layers)
794
- nodes_in = int(nodes_out)
795
-
796
- # add dropout as second to last layer
797
- if dropout > 0:
798
- dropout_layer = base_layer_selector("dropout", rate=dropout)
799
- list_of_pdf_layers.insert(-2, dropout_layer)
800
-
801
- # In case of per flavour network, concatenate at the last layer
802
- if layer_type == "dense_per_flavour":
803
- concat = base_layer_selector("concatenate")
804
- list_of_pdf_layers[-1] = [lambda x: concat(layer(x)) for layer in list_of_pdf_layers[-1]]
876
+ raise ValueError(f"{architecture=} not recognized during model generation")
805
877
 
806
- pdfs = [layer(x_input) for layer in list_of_pdf_layers[0]]
878
+ # Use the previous layer generator to generate all layers
879
+ previous_layer = input_layer
880
+ for layer_idx, (nodes_out, activation) in enumerate(zip(nodes, activations)):
881
+ layer = layer_generator(layer_idx, nodes_out, activation)
807
882
 
808
- for layers in list_of_pdf_layers[1:]:
809
- # Since some layers (dropout) are shared, we have to treat them separately
810
- if type(layers) is list:
811
- pdfs = [layer(x) for layer, x in zip(layers, pdfs)]
812
- else:
813
- pdfs = [layers(x) for x in pdfs]
883
+ # Apply the layer to the output of the previous one
884
+ previous_layer = layer(previous_layer)
885
+
886
+ # Add dropout if any to the second to last layer
887
+ if dropout_rate > 0 and layer_idx == (len(hidden_layers) - 2):
888
+ dropout_l = base_layer_selector("dropout", rate=dropout_rate)
889
+ previous_layer = dropout_l(previous_layer)
814
890
 
815
- # Wrap the pdfs in a MetaModel to enable getting/setting of weights later
816
- pdfs = [
817
- MetaModel({'NN_input': x_input}, pdf, name=f"{NN_PREFIX}_{i_replica}")(x_input)
818
- for i_replica, pdf in enumerate(pdfs)
819
- ]
820
- pdfs = Lambda(lambda nns: op.stack(nns, axis=1), name=f"stack_replicas")(pdfs)
821
- model = MetaModel({'NN_input': x_input}, pdfs, name=NN_LAYER_ALL_REPLICAS)
891
+ # In a dense-per-flavour, concatenate the last layer
892
+ if architecture == "dense_per_flavour":
893
+ concat = base_layer_selector("concatenate")
894
+ previous_layer = concat(previous_layer)
822
895
 
823
- return model
896
+ # Return the PDF model
897
+ return MetaModel({"NN_input": input_layer}, previous_layer, name=f"{NN_PREFIX}_{replica_idx}")